Assembly TLAB allocation fast path for x86_64.
TODO: resolved/initialized cases, other architectures.
Bug: 9986565
Change-Id: If6df3449a3b2f5074d11babdda0fd2791fd54946
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0f874a4..0629369 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -883,7 +883,44 @@
GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+ // Fast path tlab allocation.
+ // RDI: uint32_t type_idx, RSI: ArtMethod*
+ // RDX, RCX, R8, R9: free. RAX: return val.
+ movl MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx // Load dex cache resolved types array
+ // Load the class
+ movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
+ testl %edx, %edx // Check null class
+ jz .Lart_quick_alloc_object_tlab_slow_path
+ // Check class status.
+ cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+ jne .Lart_quick_alloc_object_tlab_slow_path
+ // Check access flags has kAccClassIsFinalizable
+ testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+ jnz .Lart_quick_alloc_object_tlab_slow_path
+ movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx // Load the object size.
+ addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx // Align the size by 8. (addr + 7) & ~7.
+ andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
+ movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread
+ movq THREAD_LOCAL_POS_OFFSET(%r8), %rax // Load thread_local_pos.
+ addq %rax, %rcx // Add the object size.
+ cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits.
+ ja .Lart_quick_alloc_object_tlab_slow_path
+ movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos.
+ addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increment thread_local_objects.
+ // Store the class pointer in the header.
+ // No fence needed for x86.
+ movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+ ret // Fast path succeeded.
+.Lart_quick_alloc_object_tlab_slow_path:
+ SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC
+ // Outgoing argument set up
+ movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current()
+ call artAllocObjectFromCodeTLAB // cxx_name(arg0, arg1, Thread*)
+ RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+ RETURN_IF_RESULT_IS_NON_ZERO // return or deliver exception
+END_FUNCTION art_quick_alloc_object_tlab
+
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 92f4ebe..b1dbf6f 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -103,6 +103,16 @@
ADD_TEST_EQ(THREAD_SELF_OFFSET,
art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 125 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
+ art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
+ art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+ art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+
// Offsets within java.lang.Object.
#define MIRROR_OBJECT_CLASS_OFFSET 0
ADD_TEST_EQ(MIRROR_OBJECT_CLASS_OFFSET, art::mirror::Object::ClassOffset().Int32Value())
@@ -120,6 +130,22 @@
#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
art::mirror::Class::ComponentTypeOffset().Int32Value())
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (52 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
+ art::mirror::Class::AccessFlagsOffset().Int32Value())
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (80 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
+ art::mirror::Class::ObjectSizeOffset().Int32Value())
+#define MIRROR_CLASS_STATUS_OFFSET (92 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
+ art::mirror::Class::StatusOffset().Int32Value())
+
+#define MIRROR_CLASS_STATUS_INITIALIZED 10
+ADD_TEST_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED),
+ static_cast<uint32_t>(art::mirror::Class::kStatusInitialized))
+#define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
+ADD_TEST_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE),
+ static_cast<uint32_t>(kAccClassIsFinalizable))
// Array offsets.
#define MIRROR_ARRAY_LENGTH_OFFSET MIRROR_OBJECT_HEADER_SIZE
@@ -134,6 +160,10 @@
art::mirror::Array::DataOffset(
sizeof(art::mirror::HeapReference<art::mirror::Object>)).Int32Value())
+#define MIRROR_OBJECT_ARRAY_COMPONENT_SIZE 4
+ADD_TEST_EQ(static_cast<size_t>(MIRROR_OBJECT_ARRAY_COMPONENT_SIZE),
+ sizeof(art::mirror::HeapReference<art::mirror::Object>))
+
// Offsets within java.lang.String.
#define MIRROR_STRING_VALUE_OFFSET MIRROR_OBJECT_HEADER_SIZE
ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
@@ -149,6 +179,10 @@
ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET,
art::mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())
+#define MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_TYPES_OFFSET,
+ art::mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value())
+
#define MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32 (36 + MIRROR_OBJECT_HEADER_SIZE)
ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32,
art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
@@ -178,6 +212,13 @@
#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne))
+#define OBJECT_ALIGNMENT_MASK 7
+ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - 1)
+
+#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xFFFFFFF8
+ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
+ ~static_cast<uint32_t>(art::kObjectAlignment - 1))
+
#if defined(__cplusplus)
} // End of CheckAsmSupportOffsets.
#endif
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 97052f1..c368dc6 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -45,7 +45,7 @@
bool is_variable_size = IsVariableSize<kVerifyFlags, kReadBarrierOption>();
CHECK(!is_variable_size) << " class=" << PrettyTypeOf(this);
}
- return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_));
+ return GetField32(ObjectSizeOffset());
}
inline Class* Class::GetSuperClass() {
@@ -523,7 +523,7 @@
<< " IsArtField=" << (this == ArtField::GetJavaLangReflectArtField())
<< " IsArtMethod=" << (this == ArtMethod::GetJavaLangReflectArtMethod())
<< " descriptor=" << PrettyDescriptor(this);
- return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+ return GetField32<kVerifyFlags>(AccessFlagsOffset());
}
inline String* Class::GetName() {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index e7f7c6e..2dff383 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -204,6 +204,9 @@
template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ static MemberOffset AccessFlagsOffset() {
+ return OFFSET_OF_OBJECT_MEMBER(Class, access_flags_);
+ }
void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -527,6 +530,9 @@
template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ static MemberOffset ObjectSizeOffset() {
+ return OFFSET_OF_OBJECT_MEMBER(Class, object_size_);
+ }
void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
DCHECK(!IsVariableSize());
diff --git a/runtime/thread.h b/runtime/thread.h
index da7af83..9d4d89d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -573,6 +573,21 @@
OFFSETOF_MEMBER(tls_ptr_sized_values, suspend_trigger));
}
+ template<size_t pointer_size>
+ static ThreadOffset<pointer_size> ThreadLocalPosOffset() {
+ return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_pos));
+ }
+
+ template<size_t pointer_size>
+ static ThreadOffset<pointer_size> ThreadLocalEndOffset() {
+ return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_end));
+ }
+
+ template<size_t pointer_size>
+ static ThreadOffset<pointer_size> ThreadLocalObjectsOffset() {
+ return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
+ }
+
// Size of stack less any space reserved for stack overflow
size_t GetStackSize() const {
return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);