ARM64: Simplify save/restore regs in invoke stub.
Save/restore fewer registers and use common macros to do so.
Rewrite the return sequence to avoid many chained branches.
And a few other minor simplifications.
Test: Pixel 2 XL boots.
Test: testrunner.py --target --64 --optimizing
Change-Id: I32ee7bad685b8bd73d07e5a4c48a6ac0b22ff762
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 09fc2c2..375b050 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -613,56 +613,18 @@
.macro INVOKE_STUB_CREATE_FRAME
+SAVE_SIZE=6*8 // x4, x5, x19, x20, FP, LR saved.
+ SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE
+ SAVE_TWO_REGS x19, x20, 16
+ SAVE_TWO_REGS xFP, xLR, 32
-SAVE_SIZE=15*8 // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
-SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
+ mov xFP, sp // Use xFP for frame pointer, as it's callee-saved.
+ .cfi_def_cfa_register xFP
+ add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and
+ and x10, x10, # ~0xf // round up for 16-byte stack alignment.
+ sub sp, sp, x10 // Adjust SP for ArtMethod*, args and alignment padding.
- mov x9, sp // Save stack pointer.
- .cfi_register sp,x9
-
- add x10, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame.
- sub x10, sp, x10 // Calculate SP position - saves + ArtMethod* + args
- and x10, x10, # ~0xf // Enforce 16 byte stack alignment.
- mov sp, x10 // Set new SP.
-
- sub x10, x9, #SAVE_SIZE // Calculate new FP (later). Done here as we must move SP
- .cfi_def_cfa_register x10 // before this.
- .cfi_adjust_cfa_offset SAVE_SIZE
-
- str x28, [x10, #112]
- .cfi_rel_offset x28, 112
-
- stp x26, x27, [x10, #96]
- .cfi_rel_offset x26, 96
- .cfi_rel_offset x27, 104
-
- stp x24, x25, [x10, #80]
- .cfi_rel_offset x24, 80
- .cfi_rel_offset x25, 88
-
- stp x22, x23, [x10, #64]
- .cfi_rel_offset x22, 64
- .cfi_rel_offset x23, 72
-
- stp x20, x21, [x10, #48]
- .cfi_rel_offset x20, 48
- .cfi_rel_offset x21, 56
-
- stp x9, x19, [x10, #32] // Save old stack pointer and x19.
- .cfi_rel_offset sp, 32
- .cfi_rel_offset x19, 40
-
- stp x4, x5, [x10, #16] // Save result and shorty addresses.
- .cfi_rel_offset x4, 16
- .cfi_rel_offset x5, 24
-
- stp xFP, xLR, [x10] // Store LR & FP.
- .cfi_rel_offset x29, 0
- .cfi_rel_offset x30, 8
-
- mov xFP, x10 // Use xFP now, as it's callee-saved.
- .cfi_def_cfa_register x29
mov xSELF, x3 // Move thread pointer into SELF register.
// Copy arguments into stack frame.
@@ -677,12 +639,10 @@
// Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
// does not have unique-id variables.
1:
- cmp w2, #0
- beq 2f
+ cbz w2, 2f
sub w2, w2, #4 // Need 65536 bytes of range.
ldr w10, [x1, x2]
str w10, [x9, x2]
-
b 1b
2:
@@ -699,29 +659,14 @@
// Branch to method.
blr x9
- // Restore return value address and shorty address.
- ldp x4, x5, [xFP, #16]
- .cfi_restore x4
- .cfi_restore x5
+ // Pop the ArtMethod* (null), arguments and alignment padding from the stack.
+ mov sp, xFP
+ .cfi_def_cfa_register sp
- ldr x28, [xFP, #112]
- .cfi_restore x28
-
- ldp x26, x27, [xFP, #96]
- .cfi_restore x26
- .cfi_restore x27
-
- ldp x24, x25, [xFP, #80]
- .cfi_restore x24
- .cfi_restore x25
-
- ldp x22, x23, [xFP, #64]
- .cfi_restore x22
- .cfi_restore x23
-
- ldp x20, x21, [xFP, #48]
- .cfi_restore x20
- .cfi_restore x21
+ // Restore saved registers including value address and shorty address.
+ RESTORE_TWO_REGS x19, x20, 16
+ RESTORE_TWO_REGS xFP, xLR, 32
+ RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE
// Store result (w0/x0/s0/d0) appropriately, depending on resultType.
ldrb w10, [x5]
@@ -731,33 +676,28 @@
// Don't set anything for a void type.
cmp w10, #'V'
- beq 3f
+ beq 1f
// Is it a double?
cmp w10, #'D'
- bne 1f
- str d0, [x4]
- b 3f
+ beq 2f
-1: // Is it a float?
+ // Is it a float?
cmp w10, #'F'
- bne 2f
- str s0, [x4]
- b 3f
+ beq 3f
-2: // Just store x0. Doesn't matter if it is 64 or 32 bits.
+ // Just store x0. Doesn't matter if it is 64 or 32 bits.
str x0, [x4]
-3: // Finish up.
- ldp x2, x19, [xFP, #32] // Restore stack pointer and x19.
- .cfi_restore x19
- mov sp, x2
- .cfi_restore sp
+1: // Finish up.
+ ret
- ldp xFP, xLR, [xFP] // Restore old frame pointer and link register.
- .cfi_restore x29
- .cfi_restore x30
+2: // Store double.
+ str d0, [x4]
+ ret
+3: // Store float.
+ str s0, [x4]
ret
.endm
@@ -1056,7 +996,7 @@
/* extern"C" void art_quick_osr_stub(void** stack, x0
* size_t stack_size_in_bytes, x1
- * const uin8_t* native_pc, x2
+ * const uint8_t* native_pc, x2
* JValue *result, x3
* char *shorty, x4
* Thread *self) x5