Remove the fast path in art_quick_resolve_string.
The stub is now only being used in very rare cases. A follow-up
change will remove the string dex cache from java.lang.Class.
0% performance regression on x86/x64/arm/arm64
Some performance regression expected on mips (which always calls
the stub).
Test: test-art-host test-art-target
Change-Id: I1f410924ef2f7d82eb3b39b4d52c283825306d2a
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 550f8c7..239fbad 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1109,62 +1109,7 @@
*/
ENTRY art_quick_resolve_string
- push {r10-r12, lr}
- .cfi_adjust_cfa_offset 16
- .cfi_rel_offset r10, 0
- .cfi_rel_offset r11, 4
- .cfi_rel_offset ip, 8
- .cfi_rel_offset lr, 12
- ldr r10, [sp, #16] @ load referrer
- ldr r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class
- ldr r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
- ubfx r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
- add r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
- ldrd r10, r11, [r10] @ load index into r11 and pointer into r10
- cmp r0, r11
- bne .Lart_quick_resolve_string_slow_path
-#ifdef USE_READ_BARRIER
- ldr r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
- cbnz r0, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
- mov r0, r10
- pop {r10-r12, pc}
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
- ldr r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
- lsrs r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
- bcs .Lart_quick_resolve_string_no_rb
- mov r0, r10
- .cfi_remember_state
- pop {r10-r12, lr}
- .cfi_adjust_cfa_offset -16
- .cfi_restore r10
- .cfi_restore r11
- .cfi_restore r12
- .cfi_restore lr
- // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
- b .Lslow_rb_art_quick_read_barrier_mark_reg00 @ Get the marked string back.
- .cfi_restore_state
-#endif
-
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
- push {r0-r9} @ 10 words of callee saves and args; {r10-r12, lr} already saved.
- .cfi_adjust_cfa_offset 40
- .cfi_rel_offset r0, 0
- .cfi_rel_offset r1, 4
- .cfi_rel_offset r2, 8
- .cfi_rel_offset r3, 12
- .cfi_rel_offset r4, 16
- .cfi_rel_offset r5, 20
- .cfi_rel_offset r6, 24
- .cfi_rel_offset r7, 28
- .cfi_rel_offset r8, 32
- .cfi_rel_offset r9, 36
- SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 @ save callee saves in case of GC
+ SETUP_SAVE_EVERYTHING_FRAME r1 @ save everything in case of GC
mov r1, r9 @ pass Thread::Current
bl artResolveStringFromCode @ (uint32_t type_idx, Thread*)
cbz r0, 1f @ If result is null, deliver the OOME.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d8ebe26..a7a3dde 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1651,44 +1651,7 @@
*/
ENTRY art_quick_resolve_string
- SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
- ldr x29, [sp, #(2 * __SIZEOF_POINTER__)] // load referrer
- ldr w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class
- ldr x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache
- ubfx lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into LR
- ldr x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x29
- cmp x0, x29, lsr #32 // compare against upper 32 bits
- bne .Lart_quick_resolve_string_slow_path
- ubfx x0, x29, #0, #32 // extract lower 32 bits into x0
-#ifdef USE_READ_BARRIER
- // Most common case: GC is not marking.
- ldr w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
- cbnz x29, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
- .cfi_remember_state
- RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
- ret
- .cfi_restore_state
- .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
- ldr x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- tbnz x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
- .cfi_remember_state
- RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
- // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
- b .Lslow_rb_art_quick_read_barrier_mark_reg00 // Get the marked string back.
- .cfi_restore_state
- .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598
-#endif
-
-// Slow path case, the index did not match.
-.Lart_quick_resolve_string_slow_path:
- INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
- SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR // save callee saves in case of GC
+ SETUP_SAVE_EVERYTHING_FRAME // save everything for stack crawl
mov x1, xSELF // pass Thread::Current
bl artResolveStringFromCode // (int32_t string_idx, Thread* self)
cbz w0, 1f // If result is null, deliver the OOME.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 635bfa3..46a6cdb 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1151,51 +1151,17 @@
END_FUNCTION art_quick_alloc_object_region_tlab
DEFINE_FUNCTION art_quick_resolve_string
- PUSH edi
- PUSH esi
- // Save xmm0 at an aligned address on the stack.
- subl MACRO_LITERAL(12), %esp
- CFI_ADJUST_CFA_OFFSET(12)
- movsd %xmm0, 0(%esp)
- movl 24(%esp), %edi // get referrer
- movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi // get declaring class
- movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi // get string dex cache
- movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
- andl %eax, %esi
- movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0
- movd %xmm0, %edi // extract pointer
- pshufd LITERAL(0x55), %xmm0, %xmm0 // shuffle index into lowest bits
- movd %xmm0, %esi // extract index
- // Restore xmm0 and remove it together with padding from the stack.
- movsd 0(%esp), %xmm0
- addl MACRO_LITERAL(12), %esp
- CFI_ADJUST_CFA_OFFSET(-12)
- cmp %esi, %eax
- jne .Lart_quick_resolve_string_slow_path
- movl %edi, %eax
- CFI_REMEMBER_STATE
- POP esi
- POP edi
-#ifdef USE_READ_BARRIER
- cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
- ret
- CFI_RESTORE_STATE
- CFI_DEF_CFA(esp, 24) // workaround for clang bug: 31975598
-
-.Lart_quick_resolve_string_slow_path:
+ SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
// Outgoing argument set up
- SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
- subl LITERAL(8), %esp // push padding
+ subl LITERAL(8), %esp // push padding
CFI_ADJUST_CFA_OFFSET(8)
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
+ pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
CFI_ADJUST_CFA_OFFSET(4)
- PUSH eax // pass arg1
+ PUSH eax // pass arg1
call SYMBOL(artResolveStringFromCode)
- addl LITERAL(16), %esp // pop arguments
+ addl LITERAL(16), %esp // pop arguments
CFI_ADJUST_CFA_OFFSET(-16)
- testl %eax, %eax // If result is null, deliver the OOME.
+ testl %eax, %eax // If result is null, deliver the OOME.
jz 1f
CFI_REMEMBER_STATE
RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 72a03eb..39f5a95 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1330,34 +1330,7 @@
END_FUNCTION art_quick_alloc_object_initialized_region_tlab
DEFINE_FUNCTION art_quick_resolve_string
- // Custom calling convention: RAX serves as both input and output.
- PUSH r15
- PUSH r14
- movq 24(%rsp), %r15 // get referrer
- movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d // get declaring class
- movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15 // get string dex cache
- movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
- andl %eax, %r14d
- movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
- movl %r14d, %r15d
- shrq LITERAL(32), %r14
- cmpl %r14d, %eax
- jne .Lart_quick_resolve_string_slow_path
- movl %r15d, %eax
- CFI_REMEMBER_STATE
- POP r14
- POP r15
-#ifdef USE_READ_BARRIER
- cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
- ret
- CFI_RESTORE_STATE
- CFI_DEF_CFA(rsp, 24) // workaround for clang bug: 31975598
-
-// Slow path, the index did not match.
-.Lart_quick_resolve_string_slow_path:
- SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+ SETUP_SAVE_EVERYTHING_FRAME
// Outgoing argument set up
movl %eax, %edi // pass string index
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()