Remove the fast path in art_quick_resolve_string.

The stub is now only being used in very rare cases. A follow-up
change will remove the string dex cache from java.lang.Class.

0% performance regression on x86/x64/arm/arm64
Some performance regression expected on mips (which always calls
the stub).

Test: test-art-host test-art-target

Change-Id: I1f410924ef2f7d82eb3b39b4d52c283825306d2a
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 550f8c7..239fbad 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1109,62 +1109,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    push   {r10-r12, lr}
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset r10, 0
-    .cfi_rel_offset r11, 4
-    .cfi_rel_offset ip, 8
-    .cfi_rel_offset lr, 12
-    ldr    r10, [sp, #16]                                        @ load referrer
-    ldr    r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET]        @ load declaring class
-    ldr    r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
-    ubfx   r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
-    add    r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
-    ldrd   r10, r11, [r10]                               @ load index into r11 and pointer into r10
-    cmp    r0, r11
-    bne    .Lart_quick_resolve_string_slow_path
-#ifdef USE_READ_BARRIER
-    ldr    r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   r0, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    mov    r0, r10
-    pop    {r10-r12, pc}
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr    r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsrs   r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
-    bcs    .Lart_quick_resolve_string_no_rb
-    mov    r0, r10
-    .cfi_remember_state
-    pop    {r10-r12, lr}
-    .cfi_adjust_cfa_offset -16
-    .cfi_restore r10
-    .cfi_restore r11
-    .cfi_restore r12
-    .cfi_restore lr
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
-    b      .Lslow_rb_art_quick_read_barrier_mark_reg00  @ Get the marked string back.
-    .cfi_restore_state
-#endif
-
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
-    push {r0-r9}                  @ 10 words of callee saves and args; {r10-r12, lr} already saved.
-    .cfi_adjust_cfa_offset 40
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset r1, 4
-    .cfi_rel_offset r2, 8
-    .cfi_rel_offset r3, 12
-    .cfi_rel_offset r4, 16
-    .cfi_rel_offset r5, 20
-    .cfi_rel_offset r6, 24
-    .cfi_rel_offset r7, 28
-    .cfi_rel_offset r8, 32
-    .cfi_rel_offset r9, 36
-    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1   @ save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME r1                   @ save everything in case of GC
     mov    r1, r9                                    @ pass Thread::Current
     bl     artResolveStringFromCode                  @ (uint32_t type_idx, Thread*)
     cbz    r0, 1f                                    @ If result is null, deliver the OOME.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d8ebe26..a7a3dde 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1651,44 +1651,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ldr   x29, [sp, #(2 * __SIZEOF_POINTER__)]                   // load referrer
-    ldr   w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET]         // load declaring class
-    ldr   x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]  // load string dex cache
-    ubfx  lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS                // get masked string index into LR
-    ldr   x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x29
-    cmp   x0, x29, lsr #32                                       // compare against upper 32 bits
-    bne   .Lart_quick_resolve_string_slow_path
-    ubfx  x0, x29, #0, #32                                       // extract lower 32 bits into x0
-#ifdef USE_READ_BARRIER
-    // Most common case: GC is not marking.
-    ldr    w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x29, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ret
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr   x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbnz  x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
-    b     .Lslow_rb_art_quick_read_barrier_mark_reg00  // Get the marked string back.
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-#endif
-
-// Slow path case, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR  // save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME                     // save everything for stack crawl
     mov   x1, xSELF                                 // pass Thread::Current
     bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
     cbz   w0, 1f                                    // If result is null, deliver the OOME.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 635bfa3..46a6cdb 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1151,51 +1151,17 @@
 END_FUNCTION art_quick_alloc_object_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    PUSH edi
-    PUSH esi
-    // Save xmm0 at an aligned address on the stack.
-    subl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(12)
-    movsd %xmm0, 0(%esp)
-    movl 24(%esp), %edi                                          // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi           // get declaring class
-    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi    // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
-    andl %eax, %esi
-    movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0    // load string idx and ptr to xmm0
-    movd %xmm0, %edi                                             // extract pointer
-    pshufd LITERAL(0x55), %xmm0, %xmm0                           // shuffle index into lowest bits
-    movd %xmm0, %esi                                             // extract index
-    // Restore xmm0 and remove it together with padding from the stack.
-    movsd 0(%esp), %xmm0
-    addl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(-12)
-    cmp %esi, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %edi, %eax
-    CFI_REMEMBER_STATE
-    POP esi
-    POP edi
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(esp, 24)                          // workaround for clang bug: 31975598
-
-.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
     // Outgoing argument set up
-    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
-    subl LITERAL(8), %esp                                        // push padding
+    subl LITERAL(8), %esp                                 // push padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                                                     // pass arg1
+    PUSH eax                                              // pass arg1
     call SYMBOL(artResolveStringFromCode)
-    addl LITERAL(16), %esp                                       // pop arguments
+    addl LITERAL(16), %esp                                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    testl %eax, %eax                                        // If result is null, deliver the OOME.
+    testl %eax, %eax                                      // If result is null, deliver the OOME.
     jz 1f
     CFI_REMEMBER_STATE
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 72a03eb..39f5a95 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1330,34 +1330,7 @@
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    // Custom calling convention: RAX serves as both input and output.
-    PUSH r15
-    PUSH r14
-    movq 24(%rsp), %r15                                         // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d         // get declaring class
-    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15  // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
-    andl %eax, %r14d
-    movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
-    movl %r14d, %r15d
-    shrq LITERAL(32), %r14
-    cmpl %r14d, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %r15d, %eax
-    CFI_REMEMBER_STATE
-    POP r14
-    POP r15
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(rsp, 24)                        // workaround for clang bug: 31975598
-
-// Slow path, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+    SETUP_SAVE_EVERYTHING_FRAME
     // Outgoing argument set up
     movl %eax, %edi                             // pass string index
     movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()