nterp: Re-order where stack alignment happens in an nterp frame.
Bug: 112676029
Test: test.py
Change-Id: Ie84902723ef25802994f844b70173e21f3ac5def
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index f24f9ed..8a445b12 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -266,6 +266,55 @@
jmp 2b
.endm
+// Setup the stack to start executing the method. Expects:
+// - rdi to contain the ArtMethod
+// - rbx, r10, r11 to be available.
+//
+// Outputs
+// - rbx contains the dex registers size
+// - r11 contains the old stack pointer.
+.macro SETUP_STACK_FRAME code_item, refs, fp, cfi_refs
+ // Fetch dex register size.
+ movzwl CODE_ITEM_REGISTERS_SIZE_OFFSET(\code_item), %ebx
+ // Fetch outs size.
+ movzwq CODE_ITEM_OUTS_SIZE_OFFSET(\code_item), \refs
+
+ // Compute required frame size for dex registers: ((2 * ebx) + refs)
+ leaq (\refs, %rbx, 2), %r11
+ salq $$2, %r11
+
+ // Compute new stack pointer in r10: add 24 for saving the previous frame,
+ // pc, and method being executed.
+ leaq -24(%rsp), %r10
+ subq %r11, %r10
+ // Alignment
+ andq $$-16, %r10
+
+ // Set reference and dex registers.
+ leaq 24(%r10, \refs, 4), \refs
+ leaq (\refs, %rbx, 4), \fp
+
+ // Now setup the stack pointer.
+ movq %rsp, %r11
+ CFI_DEF_CFA_REGISTER(r11)
+ movq %r10, %rsp
+ movq %r11, -8(\refs)
+ CFI_DEFINE_CFA_DEREF(\cfi_refs, -8, (6 + 4 + 1) * 8)
+
+ // Put nulls in reference frame.
+ testl %ebx, %ebx
+ je 2f
+ movq \refs, %r10
+1:
+ movl $$0, (%r10)
+ addq $$4, %r10
+ cmpq %r10, \fp
+ jne 1b
+2:
+ // Save the ArtMethod.
+ movq %rdi, (%rsp)
+.endm
+
// Puts the next floating point argument into the expected register,
// fetching values based on a non-range invoke.
// Uses rax as temporary.
@@ -740,54 +789,10 @@
// are called by compiled code or nterp.
SPILL_ALL_CALLEE_SAVES
- movq %rsp, %r11
- CFI_DEF_CFA_REGISTER(r11)
-
- // From this point:
- // - rax contains code item
- // - rdi contains method
- // - r11 contains saved stack pointer.
-
- // Create space for registers * 2. Set rFP and rRefs.
- movzwl CODE_ITEM_REGISTERS_SIZE_OFFSET(%rax), %ecx
- sall MACRO_LITERAL(2), %ecx
- subq %rcx, %rsp
- movq %rsp, rNEW_FP
- subq %rcx, %rsp
- movq %rsp, rNEW_REFS
-
- // Put nulls in reference frame.
- testl %ecx, %ecx
- je 2f
- movq rNEW_REFS, %rcx
-1:
- movl MACRO_LITERAL(0), (%rcx)
- addq MACRO_LITERAL(4), %rcx
- cmpq %rcx, rNEW_FP
- jne 1b
-2:
- // Create space for the previous frame, saved dex pc, and method being called
- subq MACRO_LITERAL(24), %rsp
-
- // TODO: We could get rid of the two lines below if we preserve r11 until we copy
- // rNEW_REFS to rREFS. (We currently do because we use it for copying parameters.
- // We should move the alignment and rewrite the parameter copy so that we do not
- // need r11 for that and still preserve r11.)
- //
- // Save the previous frame.
- movq %r11, -8(rNEW_REFS)
- CFI_DEFINE_CFA_DEREF(CFI_NEW_REFS, -8, (6 + 4 + 1) * 8)
-
- // Take space for outs.
- movzwl CODE_ITEM_OUTS_SIZE_OFFSET(%rax), %ecx
- sall MACRO_LITERAL(2), %ecx
- subq %rcx, %rsp
-
- // Align stack pointer to 16.
- andq MACRO_LITERAL(-16), %rsp
-
- // Save the ArtMethod.
- movq %rdi, (%rsp)
+ // Setup the frame.
+ SETUP_STACK_FRAME %rax, rNEW_REFS, rNEW_FP, CFI_NEW_REFS
+ // Make r11 point to the top of the dex register array.
+ leaq (rNEW_FP, %rbx, 4), %r11
// Fetch instruction information before replacing rPC.
movzbl 1(rPC), %edi
@@ -1384,7 +1389,7 @@
call SYMBOL(NterpGetCodeItem)
movq %rax, rPC
- // Restore xmm registers _ alignment.
+ // Restore xmm registers + alignment.
movq 0(%rsp), %xmm0
movq 8(%rsp), %xmm1
movq 16(%rsp), %xmm2
@@ -1404,55 +1409,20 @@
POP rdi
// TODO: Get shorty in a better way and remove above
- movq %rsp, %r14 // Save stack pointer
- CFI_DEF_CFA_REGISTER(r14)
-
- // Create space for registers * 2. Set rFP and rRefs.
- movzwl CODE_ITEM_REGISTERS_SIZE_OFFSET(rPC), %ebx
- sall $$2, %ebx
- subq %rbx, %rsp
- movq %rsp, rFP
- subq %rbx, %rsp
- movq %rsp, rREFS
- // Put nulls in reference frame.
- testl %ebx, %ebx
- je .Ldone_clearing_references
- movq rREFS, %r11
-.Lclear_references:
- movl $$0, (%r11)
- addq $$4, %r11
- cmpq %r11, rFP
- jne .Lclear_references
-.Ldone_clearing_references:
-
- // Create space for the previous frame, saved pc, and method being called
- subq $$24, %rsp
-
- // Save the previous frame.
- movq %r14, -8(rREFS)
- CFI_DEFINE_CFA_DEREF(CFI_REFS, -8, (6 + 4 + 1) * 8)
-
- // Take space for outs.
- movzwl CODE_ITEM_OUTS_SIZE_OFFSET(rPC), %r11d
- sall $$2, %r11d
- subq %r11, %rsp
-
- // Align stack pointer to 16.
- andq $$-16, %rsp
-
- // Save the ArtMethod.
- movq %rdi, (%rsp)
+ // Setup the stack for executing the method.
+ SETUP_STACK_FRAME rPC, rREFS, rFP, CFI_REFS
// Setup the parameters
- movzwl CODE_ITEM_INS_SIZE_OFFSET(rPC), %r11d
- testl %r11d, %r11d
+ movzwl CODE_ITEM_INS_SIZE_OFFSET(rPC), %r14d
+ testl %r14d, %r14d
je .Lxmm_setup_finished
- sall $$2, %r11d
- subq %r11, %rbx // rbx is now the offset for inputs into the registers array.
+ subq %r14, %rbx
+ salq $$2, %rbx // rbx is now the offset for inputs into the registers array.
- // Available r11, rbx, rdi, r10
testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
+
+ // Available: rdi, r10, r14
// Note the leaq below don't change the flags.
leaq 1(%rbp), %r10 // shorty + 1 ; ie skip return arg character
leaq (rFP, %rbx, 1), %rdi
@@ -1462,30 +1432,30 @@
movl %esi, (%rbx)
addq $$4, %rdi
addq $$4, %rbx
- addq $$4, %r14
- movq $$0, %r11
+ addq $$4, %r11
+ movq $$0, %r14
jmp .Lcontinue_setup_gprs
.Lhandle_static_method:
- movq $$0, %r11
- LOOP_OVER_SHORTY_STORING_GPRS rsi, esi, r10, r11, rdi, rbx, .Lgpr_setup_finished
+ movq $$0, %r14
+ LOOP_OVER_SHORTY_STORING_GPRS rsi, esi, r10, r14, rdi, rbx, .Lgpr_setup_finished
.Lcontinue_setup_gprs:
- LOOP_OVER_SHORTY_STORING_GPRS rdx, edx, r10, r11, rdi, rbx, .Lgpr_setup_finished
- LOOP_OVER_SHORTY_STORING_GPRS rcx, ecx, r10, r11, rdi, rbx, .Lgpr_setup_finished
- LOOP_OVER_SHORTY_STORING_GPRS r8, r8d, r10, r11, rdi, rbx, .Lgpr_setup_finished
- LOOP_OVER_SHORTY_STORING_GPRS r9, r9d, r10, r11, rdi, rbx, .Lgpr_setup_finished
- LOOP_OVER_INTs r10, r11, rdi, rbx, r14, .Lgpr_setup_finished
+ LOOP_OVER_SHORTY_STORING_GPRS rdx, edx, r10, r14, rdi, rbx, .Lgpr_setup_finished
+ LOOP_OVER_SHORTY_STORING_GPRS rcx, ecx, r10, r14, rdi, rbx, .Lgpr_setup_finished
+ LOOP_OVER_SHORTY_STORING_GPRS r8, r8d, r10, r14, rdi, rbx, .Lgpr_setup_finished
+ LOOP_OVER_SHORTY_STORING_GPRS r9, r9d, r10, r14, rdi, rbx, .Lgpr_setup_finished
+ LOOP_OVER_INTs r10, r14, rdi, rbx, r11, .Lgpr_setup_finished
.Lgpr_setup_finished:
leaq 1(%rbp), %r10 // shorty + 1 ; ie skip return arg character
- movq $$0, %r11 // reset counter
- LOOP_OVER_SHORTY_STORING_XMMS xmm0, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm1, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm2, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm3, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm4, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm5, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm6, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_SHORTY_STORING_XMMS xmm7, r10, r11, rdi, .Lxmm_setup_finished
- LOOP_OVER_FPs r10, r11, rdi, r14, .Lxmm_setup_finished
+ movq $$0, %r14 // reset counter
+ LOOP_OVER_SHORTY_STORING_XMMS xmm0, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm1, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm2, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm3, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm4, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm5, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm6, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_SHORTY_STORING_XMMS xmm7, r10, r14, rdi, .Lxmm_setup_finished
+ LOOP_OVER_FPs r10, r14, rdi, r11, .Lxmm_setup_finished
.Lxmm_setup_finished:
// Set the dex pc pointer.
addq $$CODE_ITEM_INSNS_OFFSET, rPC
diff --git a/runtime/nterp_helpers.cc b/runtime/nterp_helpers.cc
index 6522116..a2ec882 100644
--- a/runtime/nterp_helpers.cc
+++ b/runtime/nterp_helpers.cc
@@ -43,6 +43,8 @@
* | registers | On x86 and x64 this includes the return address,
* | | already spilled on entry.
* ----------------
+ * | alignment | Stack aligment of kStackAlignment.
+ * ----------------
* | | Contains `registers_size` entries (of size 4) from
* | dex | the code item information of the method.
* | registers |
@@ -57,8 +59,6 @@
* ---------------- registers array for easy access from nterp when returning.
* | dex_pc_ptr | Pointer to the dex instruction being executed.
* ---------------- Stored whenever nterp goes into the runtime.
- * | alignment | Stack aligment of kStackAlignment. TODO: try to move
- * ---------------- this below the callee-save registers.
* | | In case nterp calls compiled code, we reserve space
* | out | for out registers. This space will be used for
* | registers | arguments passed on stack.
@@ -118,25 +118,29 @@
uintptr_t NterpGetRegistersArray(ArtMethod** frame) {
CodeItemDataAccessor accessor((*frame)->DexInstructionData());
const uint16_t num_regs = accessor.RegistersSize();
- // The registers array is just below the frame entry.
- return reinterpret_cast<uintptr_t>(frame) + NterpGetFrameSize(*frame) -
- NterpGetFrameEntrySize() -
- (num_regs * kVRegSize);
+ // The registers array is just above the reference array.
+ return NterpGetReferenceArray(frame) + (num_regs * kVRegSize);
}
uintptr_t NterpGetReferenceArray(ArtMethod** frame) {
CodeItemDataAccessor accessor((*frame)->DexInstructionData());
- const uint16_t num_regs = accessor.RegistersSize();
- // The references array is just below the registers array.
- return NterpGetRegistersArray(frame) - (num_regs * kVRegSize);
+ const uint16_t out_regs = accessor.OutsSize();
+ // The references array is just above the saved frame pointer.
+ return reinterpret_cast<uintptr_t>(frame) +
+ kPointerSize + // method
+ (out_regs * kVRegSize) + // out arguments
+ kPointerSize + // saved dex pc
+ kPointerSize; // previous frame.
}
uint32_t NterpGetDexPC(ArtMethod** frame) {
- uintptr_t dex_pc_ptr = NterpGetReferenceArray(frame) -
- kPointerSize - // saved previous frame
- kPointerSize; // saved dex pc
- CodeItemInstructionAccessor accessor((*frame)->DexInstructions());
- return *reinterpret_cast<const uint16_t**>(dex_pc_ptr) - accessor.Insns();
+ CodeItemDataAccessor accessor((*frame)->DexInstructionData());
+ const uint16_t out_regs = accessor.OutsSize();
+ uintptr_t dex_pc_ptr = reinterpret_cast<uintptr_t>(frame) +
+ kPointerSize + // method
+ (out_regs * kVRegSize); // out arguments
+ CodeItemInstructionAccessor instructions((*frame)->DexInstructions());
+ return *reinterpret_cast<const uint16_t**>(dex_pc_ptr) - instructions.Insns();
}
uint32_t NterpGetVReg(ArtMethod** frame, uint16_t vreg) {