When entering nterp, take a fast path for instance calls with 1 argument.
Such methods only take 'this' as an argument and we don't need to fetch
the shorty.
We can make this optimization when doing nterp->compiled as a follow-up,
by checking that the next instruction after the invoke is not
move-result(-wide).
Test: test.py
Bug: 112676029
Change-Id: Ibc7b4d4ca1c636f4ad6572484e0990ccdbd63293
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index b6d9db6..a977a90 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -383,7 +383,7 @@
//
// Outputs
// - ip contains the dex registers size
-// - x13 contains the old stack pointer.
+// - x28 contains the old stack pointer.
// - \code_item is replaced with a pointer to the instructions
// - if load_ins is 1, w15 contains the ins
//
@@ -410,10 +410,10 @@
add \fp, \refs, ip, lsl #2
// Now setup the stack pointer.
- mov x13, sp
- .cfi_def_cfa_register x13
+ mov x28, sp
+ .cfi_def_cfa_register x28
mov sp, x14
- str x13, [\refs, #-8]
+ str x28, [\refs, #-8]
CFI_DEF_CFA_BREG_PLUS_UCONST \cfi_refs, -8, CALLEE_SAVES_SIZE
// Put nulls in reference frame.
@@ -483,14 +483,10 @@
.endm
.macro SPILL_ALL_ARGUMENTS
- INCREASE_FRAME 128
- // GP arguments.
- SAVE_TWO_REGS x0, x1, 0
- SAVE_TWO_REGS x2, x3, 16
- SAVE_TWO_REGS x4, x5, 32
- SAVE_TWO_REGS x6, x7, 48
-
- // FP arguments
+ stp x0, x1, [sp, #-128]!
+ stp x2, x3, [sp, #16]
+ stp x4, x5, [sp, #32]
+ stp x6, x7, [sp, #48]
stp d0, d1, [sp, #64]
stp d2, d3, [sp, #80]
stp d4, d5, [sp, #96]
@@ -498,18 +494,14 @@
.endm
.macro RESTORE_ALL_ARGUMENTS
- // GP arguments.
- RESTORE_TWO_REGS x0, x1, 0
- RESTORE_TWO_REGS x2, x3, 16
- RESTORE_TWO_REGS x4, x5, 32
- RESTORE_TWO_REGS x6, x7, 48
-
- // FP arguments
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
ldp d0, d1, [sp, #64]
ldp d2, d3, [sp, #80]
ldp d4, d5, [sp, #96]
ldp d6, d7, [sp, #112]
- DECREASE_FRAME 128
+ ldp x0, x1, [sp], #128
.endm
// Helper to setup the stack after doing a nterp to nterp call. This will setup:
@@ -1356,16 +1348,7 @@
/* Spill callee save regs */
SPILL_ALL_CALLEE_SAVES
- // TODO: Get shorty in a better way and remove below
- SPILL_ALL_ARGUMENTS
-
- bl NterpGetShorty
- // Save shorty in callee-save xIBASE.
- mov xIBASE, x0
-
- RESTORE_ALL_ARGUMENTS
ldr xPC, [x0, #ART_METHOD_DATA_OFFSET_64]
-
// Setup the stack for executing the method.
SETUP_STACK_FRAME xPC, xREFS, xFP, CFI_REFS, load_ins=1
@@ -1373,24 +1356,37 @@
cbz w15, .Lxmm_setup_finished
sub ip2, ip, x15
- lsl x8, ip2, #2 // x8 is now the offset for inputs into the registers array.
+ ldr w26, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+ lsl x21, ip2, #2 // x21 is now the offset for inputs into the registers array.
- // Setup shorty, pointer to inputs in FP and pointer to inputs in REFS
- add x9, xIBASE, #1 // shorty + 1 ; ie skip return arg character
- add x10, xFP, x8
- add x11, xREFS, x8
+ // If the method is not static and there is one argument ('this'), we don't need to fetch the
+ // shorty.
+ tbnz w26, #ART_METHOD_IS_STATIC_FLAG_BIT, .Lsetup_with_shorty
+ str w1, [xFP, x21]
+ str w1, [xREFS, x21]
+ cmp w15, #1
+ b.eq .Lxmm_setup_finished
- ldr wip, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
- // TODO: could be TBNZ but we'd need a constant for log2(ART_METHOD_IS_STATIC_FLAG).
- tst wip, #ART_METHOD_IS_STATIC_FLAG
- b.ne .Lhandle_static_method
- str w1, [x10], #4
- str w1, [x11], #4
- add x13, x13, #4
+.Lsetup_with_shorty:
+ // TODO: Get shorty in a better way and remove below
+ SPILL_ALL_ARGUMENTS
+ bl NterpGetShorty
+ // Save shorty in callee-save xIBASE.
+ mov xIBASE, x0
+ RESTORE_ALL_ARGUMENTS
+
+ // Setup pointer to inputs in FP and pointer to inputs in REFS
+ add x10, xFP, x21
+ add x11, xREFS, x21
mov x12, #0
+
+ add x9, xIBASE, #1 // shorty + 1 ; ie skip return arg character
+ tbnz w26, #ART_METHOD_IS_STATIC_FLAG_BIT, .Lhandle_static_method
+ add x10, x10, #4
+ add x11, x11, #4
+ add x28, x28, #4
b .Lcontinue_setup_gprs
.Lhandle_static_method:
- mov x12, #0
LOOP_OVER_SHORTY_STORING_GPRS x1, w1, x9, x12, x10, x11, .Lgpr_setup_finished
.Lcontinue_setup_gprs:
LOOP_OVER_SHORTY_STORING_GPRS x2, w2, x9, x12, x10, x11, .Lgpr_setup_finished
@@ -1399,7 +1395,7 @@
LOOP_OVER_SHORTY_STORING_GPRS x5, w5, x9, x12, x10, x11, .Lgpr_setup_finished
LOOP_OVER_SHORTY_STORING_GPRS x6, w6, x9, x12, x10, x11, .Lgpr_setup_finished
LOOP_OVER_SHORTY_STORING_GPRS x7, w7, x9, x12, x10, x11, .Lgpr_setup_finished
- LOOP_OVER_INTs x9, x12, x10, x11, x13, .Lgpr_setup_finished
+ LOOP_OVER_INTs x9, x12, x10, x11, x28, .Lgpr_setup_finished
.Lgpr_setup_finished:
add x9, xIBASE, #1 // shorty + 1 ; ie skip return arg character
mov x12, #0 // reset counter
@@ -1411,7 +1407,7 @@
LOOP_OVER_SHORTY_STORING_FPS d5, s5, x9, x12, x10, .Lxmm_setup_finished
LOOP_OVER_SHORTY_STORING_FPS d6, s6, x9, x12, x10, .Lxmm_setup_finished
LOOP_OVER_SHORTY_STORING_FPS d7, s7, x9, x12, x10, .Lxmm_setup_finished
- LOOP_OVER_FPs x9, x12, x10, x13, .Lxmm_setup_finished
+ LOOP_OVER_FPs x9, x12, x10, x28, .Lxmm_setup_finished
.Lxmm_setup_finished:
CFI_DEFINE_DEX_PC_WITH_OFFSET(CFI_TMP, CFI_DEX, 0)
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
index 7095f58..d2ca06f 100644
--- a/runtime/interpreter/mterp/armng/main.S
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -505,31 +505,6 @@
.endif
.endm
-.macro SPILL_ALL_ARGUMENTS
- // We spill r4 for stack alignment.
- push {r0-r4}
- .cfi_adjust_cfa_offset 20
- .cfi_rel_offset r0, 0
- .cfi_rel_offset r1, 4
- .cfi_rel_offset r2, 8
- .cfi_rel_offset r3, 12
- .cfi_rel_offset r4, 16
- vpush {s0-s15}
- .cfi_adjust_cfa_offset 64
-.endm
-
-.macro RESTORE_ALL_ARGUMENTS
- vpop {s0-s15}
- .cfi_adjust_cfa_offset -64
- pop {r0-r4}
- .cfi_restore r0
- .cfi_restore r1
- .cfi_restore r2
- .cfi_restore r3
- .cfi_restore r4
- .cfi_adjust_cfa_offset -20
-.endm
-
// Helper to setup the stack after doing a nterp to nterp call. This will setup:
// - rNEW_FP: the new pointer to dex registers
// - rNEW_REFS: the new pointer to references
@@ -1396,15 +1371,6 @@
/* Spill callee save regs */
SPILL_ALL_CALLEE_SAVES
- // TODO: Get shorty in a better way and remove below
- SPILL_ALL_ARGUMENTS
-
- bl NterpGetShorty
- // Save shorty in callee-save rIBASE.
- mov rIBASE, r0
-
- RESTORE_ALL_ARGUMENTS
-
ldr rPC, [r0, #ART_METHOD_DATA_OFFSET_32]
// Setup the stack for executing the method.
@@ -1414,36 +1380,54 @@
cmp r4, #0
beq .Lxmm_setup_finished
- sub r4, rINST, r4
- lsl r4, r4, #2 // r4 is now the offset for inputs into the registers array.
+ sub rINST, rINST, r4
+ ldr r8, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+ lsl rINST, rINST, #2 // rINST is now the offset for inputs into the registers array.
+ mov rIBASE, ip // rIBASE contains the old stack pointer
- mov lr, ip // lr contains the old stack pointer
+ // If the method is not static and there is one argument ('this'), we don't need to fetch the
+ // shorty.
+ tst r8, #ART_METHOD_IS_STATIC_FLAG
+ bne .Lsetup_with_shorty
+ str r1, [rFP, rINST]
+ str r1, [rREFS, rINST]
+ cmp r4, #1
+ beq .Lxmm_setup_finished
- ldr ip, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
- // r0 is now available.
+.Lsetup_with_shorty:
+ // Save arguments that were passed before calling into the runtime.
+ // No need to save r0 (ArtMethod) as we're not using it later in this code.
+ // Save r4 for stack aligment.
+ // TODO: Get shorty in a better way and remove below
+ push {r1-r4}
+ vpush {s0-s15}
+ bl NterpGetShorty
+ vpop {s0-s15}
+ pop {r1-r4}
+
+ mov ip, r8
+ add r8, rREFS, rINST
+ add r7, rFP, rINST
+ mov r4, #0
// Setup shorty, pointer to inputs in FP and pointer to inputs in REFS
- add r0, rIBASE, #1 // shorty + 1 ; ie skip return arg character
- add r7, rFP, r4
- add r8, rREFS, r4
+ add lr, r0, #1 // shorty + 1 ; ie skip return arg character
tst ip, #ART_METHOD_IS_STATIC_FLAG
bne .Lhandle_static_method
- str r1, [r7], #4
- str r1, [r8], #4
- add lr, lr, #4
- mov r4, #0
+ add r7, r7, #4
+ add r8, r8, #4
+ add rIBASE, rIBASE, #4
b .Lcontinue_setup_gprs
.Lhandle_static_method:
- mov r4, #0
- LOOP_OVER_SHORTY_STORING_GPRS r1, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0
+ LOOP_OVER_SHORTY_STORING_GPRS r1, lr, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0
.Lcontinue_setup_gprs:
- LOOP_OVER_SHORTY_STORING_GPRS r2, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0
- LOOP_OVER_SHORTY_STORING_GPRS r3, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=1
+ LOOP_OVER_SHORTY_STORING_GPRS r2, lr, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0
+ LOOP_OVER_SHORTY_STORING_GPRS r3, lr, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=1
.Lif_long:
- LOOP_OVER_INTs r0, r4, r7, r8, lr, ip, r1, .Lgpr_setup_finished
+ LOOP_OVER_INTs lr, r4, r7, r8, rIBASE, ip, r1, .Lgpr_setup_finished
.Lgpr_setup_finished:
- add r0, rIBASE, #1 // shorty + 1 ; ie skip return arg character
+ add r0, r0, #1 // shorty + 1 ; ie skip return arg character
mov r1, r7
- add r2, lr, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK
+ add r2, rIBASE, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK
vpush {s0-s15}
mov r3, sp
bl NterpStoreArm32Fprs
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 20dc760..02f0c5a 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -1450,17 +1450,40 @@
/* Spill callee save regs */
SPILL_ALL_CALLEE_SAVES
+ movq ART_METHOD_DATA_OFFSET_64(%rdi), rPC
+
+ // Setup the stack for executing the method.
+ SETUP_STACK_FRAME rPC, rREFS, rREFS32, rFP, CFI_REFS, load_ins=1
+
+ // Setup the parameters
+ testl %r14d, %r14d
+ je .Lxmm_setup_finished
+
+ subq %r14, %rbx
+ salq $$2, %rbx // rbx is now the offset for inputs into the registers array.
+
+ // If the method is not static and there is one argument ('this'), we don't need to fetch the
+ // shorty.
+ testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
+ jne .Lsetup_with_shorty
+
+ movl %esi, (rFP, %rbx)
+ movl %esi, (rREFS, %rbx)
+
+ cmpl $$1, %r14d
+ je .Lxmm_setup_finished
+
+.Lsetup_with_shorty:
// TODO: Get shorty in a better way and remove below
- PUSH rdi
- PUSH rsi
- PUSH rdx
- PUSH rcx
- PUSH r8
- PUSH r9
+ push %rdi
+ push %rsi
+ push %rdx
+ push %rcx
+ push %r8
+ push %r9
// Save xmm registers + alignment.
subq MACRO_LITERAL(8 * 8 + 8), %rsp
- CFI_ADJUST_CFA_OFFSET(8 * 8 + 8)
movq %xmm0, 0(%rsp)
movq %xmm1, 8(%rsp)
movq %xmm2, 16(%rsp)
@@ -1484,45 +1507,31 @@
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
addq MACRO_LITERAL(8 * 8 + 8), %rsp
- CFI_ADJUST_CFA_OFFSET(-8 * 8 - 8)
- POP r9
- POP r8
- POP rcx
- POP rdx
- POP rsi
- POP rdi
+ pop %r9
+ pop %r8
+ pop %rcx
+ pop %rdx
+ pop %rsi
+ pop %rdi
+ // Reload the old stack pointer, which used to be stored in %r11, which is not callee-saved.
+ movq -8(rREFS), %r11
// TODO: Get shorty in a better way and remove above
- movq ART_METHOD_DATA_OFFSET_64(%rdi), rPC
-
- // Setup the stack for executing the method.
- SETUP_STACK_FRAME rPC, rREFS, rREFS32, rFP, CFI_REFS, load_ins=1
-
- // Setup the parameters
- testl %r14d, %r14d
- je .Lxmm_setup_finished
-
- subq %r14, %rbx
- salq $$2, %rbx // rbx is now the offset for inputs into the registers array.
-
+ movq $$0, %r14
testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
- // Available: rdi, r10, r14
+ // Available: rdi, r10
// Note the leaq below don't change the flags.
leaq 1(%rbp), %r10 // shorty + 1 ; ie skip return arg character
leaq (rFP, %rbx, 1), %rdi
leaq (rREFS, %rbx, 1), %rbx
jne .Lhandle_static_method
- movl %esi, (%rdi)
- movl %esi, (%rbx)
addq $$4, %rdi
addq $$4, %rbx
addq $$4, %r11
- movq $$0, %r14
jmp .Lcontinue_setup_gprs
.Lhandle_static_method:
- movq $$0, %r14
LOOP_OVER_SHORTY_STORING_GPRS rsi, esi, r10, r14, rdi, rbx, .Lgpr_setup_finished
.Lcontinue_setup_gprs:
LOOP_OVER_SHORTY_STORING_GPRS rdx, edx, r10, r14, rdi, rbx, .Lgpr_setup_finished
diff --git a/tools/cpp-define-generator/art_method.def b/tools/cpp-define-generator/art_method.def
index 7b5606f..c2e18b1 100644
--- a/tools/cpp-define-generator/art_method.def
+++ b/tools/cpp-define-generator/art_method.def
@@ -25,6 +25,8 @@
art::kAccStatic)
ASM_DEFINE(ART_METHOD_IMT_MASK,
art::ImTable::kSizeTruncToPowerOfTwo - 1)
+ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG_BIT,
+ art::MostSignificantBit(art::kAccStatic))
ASM_DEFINE(ART_METHOD_DECLARING_CLASS_OFFSET,
art::ArtMethod::DeclaringClassOffset().Int32Value())
ASM_DEFINE(ART_METHOD_JNI_OFFSET_32,