Move JIT code allocation to JitMemoryRegion.
So that JitCodeCache does not need to know about dual view.
Test: m
Change-Id: I19d2e8e69eef07648803937fff928aa9b45ac5a2
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 100b399..300225e 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -372,17 +372,6 @@
return in_collection;
}
-static size_t GetJitCodeAlignment() {
- if (kRuntimeISA == InstructionSet::kArm || kRuntimeISA == InstructionSet::kThumb2) {
- // Some devices with 32-bit ARM kernels need additional JIT code alignment when using dual
- // view JIT (b/132205399). The alignment returned here coincides with the typical ARM d-cache
- // line (though the value should be probed ideally). Both the method header and code in the
- // cache are aligned to this size.
- return 64;
- }
- return GetInstructionSetAlignment(kRuntimeISA);
-}
-
static uintptr_t FromCodeToAllocation(const void* code) {
size_t alignment = GetJitCodeAlignment();
return reinterpret_cast<uintptr_t>(code) - RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -704,85 +693,18 @@
DCheckRootsAreValid(roots);
}
- OatQuickMethodHeader* method_header = nullptr;
- uint8_t* code_ptr = nullptr;
-
MutexLock mu(self, *Locks::jit_lock_);
// We need to make sure that there will be no jit-gcs going on and wait for any ongoing one to
// finish.
WaitForPotentialCollectionToCompleteRunnable(self);
- {
- ScopedCodeCacheWrite scc(*region);
-
- size_t alignment = GetJitCodeAlignment();
- // Ensure the header ends up at expected instruction alignment.
- size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
- size_t total_size = header_size + code_size;
-
- // AllocateCode allocates memory in non-executable region for alignment header and code. The
- // header size may include alignment padding.
- uint8_t* nox_memory = region->AllocateCode(total_size, alignment);
- if (nox_memory == nullptr) {
- return nullptr;
- }
-
- // code_ptr points to non-executable code.
- code_ptr = nox_memory + header_size;
- std::copy(code, code + code_size, code_ptr);
- method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-
- // From here code_ptr points to executable code.
- code_ptr = region->GetExecutableAddress(code_ptr);
-
- new (method_header) OatQuickMethodHeader(
- (stack_map != nullptr) ? code_ptr - stack_map : 0u,
- code_size);
-
- DCHECK(!Runtime::Current()->IsAotCompiler());
- if (has_should_deoptimize_flag) {
- method_header->SetHasShouldDeoptimizeFlag();
- }
-
- // Update method_header pointer to executable code region.
- method_header = region->GetExecutableAddress(method_header);
-
- // Both instruction and data caches need flushing to the point of unification where both share
- // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
- // newly added code are written out to the point of unification. Flushing the instruction
- // cache ensures the newly written code will be fetched from the point of unification before
- // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
- // prevent stale code from residing in the instruction cache.
- //
- // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
- // may trigger a segfault if a page fault occurs when requesting a cache maintenance
- // operation. This is a kernel bug that we need to work around until affected devices
- // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
- //
- // For reference, this behavior is caused by this commit:
- // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
- //
- if (region->HasDualCodeMapping()) {
- // Flush the data cache lines associated with the non-executable copy of the code just added.
- FlushDataCache(nox_memory, nox_memory + total_size);
- }
- // FlushInstructionCache() flushes both data and instruction caches lines. The cacheline range
- // flushed is for the executable mapping of the code just added.
- uint8_t* x_memory = reinterpret_cast<uint8_t*>(method_header);
- FlushInstructionCache(x_memory, x_memory + total_size);
-
- // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
- // correctness as code may still be in instruction pipelines despite the i-cache flush. It is
- // not safe to assume that changing permissions with mprotect (RX->RWX->RX) will cause a TLB
- // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
- // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
- // hardware support that broadcasts TLB invalidations and so their kernels have no software
- // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
- // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
- // platforms lacking the appropriate support.
- art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);
-
- number_of_compilations_++;
+ const uint8_t* code_ptr = region->AllocateCode(
+ code, code_size, stack_map, has_should_deoptimize_flag);
+ if (code_ptr == nullptr) {
+ return nullptr;
}
+ OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+
+ number_of_compilations_++;
// We need to update the entry point in the runnable state for the instrumentation.
{
diff --git a/runtime/jit/jit_memory_region.cc b/runtime/jit/jit_memory_region.cc
index ab9a817..c250d6b 100644
--- a/runtime/jit/jit_memory_region.cc
+++ b/runtime/jit/jit_memory_region.cc
@@ -23,6 +23,7 @@
#include "base/bit_utils.h" // For RoundDown, RoundUp
#include "base/globals.h"
#include "base/logging.h" // For VLOG.
+#include "base/membarrier.h"
#include "base/memfd.h"
#include "base/systrace.h"
#include "gc/allocator/dlmalloc.h"
@@ -296,24 +297,87 @@
}
}
-uint8_t* JitMemoryRegion::AllocateCode(size_t code_size, size_t alignment) {
- // Each allocation should be on its own set of cache lines.
- // `code_size` covers the OatQuickMethodHeader, the JIT generated machine code,
- // and any alignment padding.
- size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
- DCHECK_GT(code_size, header_size);
- uint8_t* result = reinterpret_cast<uint8_t*>(
- mspace_memalign(exec_mspace_, alignment, code_size));
+const uint8_t* JitMemoryRegion::AllocateCode(const uint8_t* code,
+ size_t code_size,
+ const uint8_t* stack_map,
+ bool has_should_deoptimize_flag) {
+ ScopedCodeCacheWrite scc(*this);
+
+ size_t alignment = GetJitCodeAlignment();
// Ensure the header ends up at expected instruction alignment.
- DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
- used_memory_for_code_ += mspace_usable_size(result);
+ size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+ size_t total_size = header_size + code_size;
+
+ // Each allocation should be on its own set of cache lines.
+ // `total_size` covers the OatQuickMethodHeader, the JIT generated machine code,
+ // and any alignment padding.
+ DCHECK_GT(total_size, header_size);
+ uint8_t* w_memory = reinterpret_cast<uint8_t*>(
+ mspace_memalign(exec_mspace_, alignment, total_size));
+ if (w_memory == nullptr) {
+ return nullptr;
+ }
+ uint8_t* x_memory = GetExecutableAddress(w_memory);
+ // Ensure the header ends up at expected instruction alignment.
+ DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(w_memory + header_size), alignment);
+ used_memory_for_code_ += mspace_usable_size(w_memory);
+ const uint8_t* result = x_memory + header_size;
+
+ // Write the code.
+ std::copy(code, code + code_size, w_memory + header_size);
+
+ // Write the header.
+ OatQuickMethodHeader* method_header =
+ OatQuickMethodHeader::FromCodePointer(w_memory + header_size);
+ new (method_header) OatQuickMethodHeader(
+ (stack_map != nullptr) ? result - stack_map : 0u,
+ code_size);
+ if (has_should_deoptimize_flag) {
+ method_header->SetHasShouldDeoptimizeFlag();
+ }
+
+ // Both instruction and data caches need flushing to the point of unification where both share
+ // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
+ // newly added code are written out to the point of unification. Flushing the instruction
+ // cache ensures the newly written code will be fetched from the point of unification before
+ // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
+ // prevent stale code from residing in the instruction cache.
+ //
+ // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
+ // may trigger a segfault if a page fault occurs when requesting a cache maintenance
+ // operation. This is a kernel bug that we need to work around until affected devices
+ // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
+ //
+ // For reference, this behavior is caused by this commit:
+ // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+ //
+ if (HasDualCodeMapping()) {
+ // Flush the data cache lines associated with the non-executable copy of the code just added.
+ FlushDataCache(w_memory, w_memory + total_size);
+ }
+
+ // FlushInstructionCache() flushes both data and instruction caches lines. The cacheline range
+ // flushed is for the executable mapping of the code just added.
+ FlushInstructionCache(x_memory, x_memory + total_size);
+
+ // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
+ // correctness as code may still be in instruction pipelines despite the i-cache flush. It is
+ // not safe to assume that changing permissions with mprotect (RX->RWX->RX) will cause a TLB
+ // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
+ // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
+ // hardware support that broadcasts TLB invalidations and so their kernels have no software
+ // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
+ // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
+ // platforms lacking the appropriate support.
+ art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);
+
return result;
}
-void JitMemoryRegion::FreeCode(uint8_t* code) {
+void JitMemoryRegion::FreeCode(const uint8_t* code) {
code = GetNonExecutableAddress(code);
used_memory_for_code_ -= mspace_usable_size(code);
- mspace_free(exec_mspace_, code);
+ mspace_free(exec_mspace_, const_cast<uint8_t*>(code));
}
uint8_t* JitMemoryRegion::AllocateData(size_t data_size) {
diff --git a/runtime/jit/jit_memory_region.h b/runtime/jit/jit_memory_region.h
index dda9fc2..8885ee8 100644
--- a/runtime/jit/jit_memory_region.h
+++ b/runtime/jit/jit_memory_region.h
@@ -19,6 +19,7 @@
#include <string>
+#include "arch/instruction_set.h"
#include "base/globals.h"
#include "base/locks.h"
#include "base/mem_map.h"
@@ -32,6 +33,17 @@
// architectures.
static constexpr int kJitCodeAccountingBytes = 16;
+size_t inline GetJitCodeAlignment() {
+ if (kRuntimeISA == InstructionSet::kArm || kRuntimeISA == InstructionSet::kThumb2) {
+ // Some devices with 32-bit ARM kernels need additional JIT code alignment when using dual
+ // view JIT (b/132205399). The alignment returned here coincides with the typical ARM d-cache
+ // line (though the value should be probed ideally). Both the method header and code in the
+ // cache are aligned to this size.
+ return 64;
+ }
+ return GetInstructionSetAlignment(kRuntimeISA);
+}
+
// Represents a memory region for the JIT, where code and data are stored. This class
// provides allocation and deallocation primitives.
class JitMemoryRegion {
@@ -62,8 +74,15 @@
// Set the footprint limit of the code cache.
void SetFootprintLimit(size_t new_footprint) REQUIRES(Locks::jit_lock_);
- uint8_t* AllocateCode(size_t code_size, size_t alignment) REQUIRES(Locks::jit_lock_);
- void FreeCode(uint8_t* code) REQUIRES(Locks::jit_lock_);
+
+ // Copy the code into the region, and allocate an OatQuickMethodHeader.
+ // Callers should not write into the returned memory, as it may be read-only.
+ const uint8_t* AllocateCode(const uint8_t* code,
+ size_t code_size,
+ const uint8_t* stack_map,
+ bool has_should_deoptimize_flag)
+ REQUIRES(Locks::jit_lock_);
+ void FreeCode(const uint8_t* code) REQUIRES(Locks::jit_lock_);
uint8_t* AllocateData(size_t data_size) REQUIRES(Locks::jit_lock_);
void FreeData(uint8_t* data) REQUIRES(Locks::jit_lock_);
@@ -83,28 +102,10 @@
return exec_pages_.HasAddress(ptr);
}
- const MemMap* GetUpdatableCodeMapping() const {
- if (HasDualCodeMapping()) {
- return &non_exec_pages_;
- } else if (HasCodeMapping()) {
- return &exec_pages_;
- } else {
- return nullptr;
- }
- }
-
const MemMap* GetExecPages() const {
return &exec_pages_;
}
- template <typename T> T* GetExecutableAddress(T* src_ptr) {
- return TranslateAddress(src_ptr, non_exec_pages_, exec_pages_);
- }
-
- template <typename T> T* GetNonExecutableAddress(T* src_ptr) {
- return TranslateAddress(src_ptr, exec_pages_, non_exec_pages_);
- }
-
void* MoreCore(const void* mspace, intptr_t increment);
bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
@@ -133,11 +134,29 @@
if (!HasDualCodeMapping()) {
return src_ptr;
}
- CHECK(src.HasAddress(src_ptr));
- uint8_t* const raw_src_ptr = reinterpret_cast<uint8_t*>(src_ptr);
+ CHECK(src.HasAddress(src_ptr)) << reinterpret_cast<const void*>(src_ptr);
+ const uint8_t* const raw_src_ptr = reinterpret_cast<const uint8_t*>(src_ptr);
return reinterpret_cast<T*>(raw_src_ptr - src.Begin() + dst.Begin());
}
+ const MemMap* GetUpdatableCodeMapping() const {
+ if (HasDualCodeMapping()) {
+ return &non_exec_pages_;
+ } else if (HasCodeMapping()) {
+ return &exec_pages_;
+ } else {
+ return nullptr;
+ }
+ }
+
+ template <typename T> T* GetExecutableAddress(T* src_ptr) {
+ return TranslateAddress(src_ptr, non_exec_pages_, exec_pages_);
+ }
+
+ template <typename T> T* GetNonExecutableAddress(T* src_ptr) {
+ return TranslateAddress(src_ptr, exec_pages_, non_exec_pages_);
+ }
+
static int CreateZygoteMemory(size_t capacity, std::string* error_msg);
static bool ProtectZygoteMemory(int fd, std::string* error_msg);
@@ -178,6 +197,7 @@
// The opaque mspace for allocating code.
void* exec_mspace_ GUARDED_BY(Locks::jit_lock_);
+ friend class ScopedCodeCacheWrite; // For GetUpdatableCodeMapping
friend class TestZygoteMemory;
};