Implement on-stack replacement for MIPS32 and MIPS64
Change-Id: I4e589f0597b597adff95e1289f20deb2eab97e9b
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index fd1851f..3c0e452 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -319,6 +319,111 @@
.endm
/*
+ * On stack replacement stub.
+ * On entry:
+ * a0 = stack to copy
+ * a1 = size of stack
+ * a2 = pc to call
+ * a3 = JValue* result
+ * [sp + 16] = shorty
+ * [sp + 20] = thread
+ */
+ENTRY art_quick_osr_stub
+ // Save callee general purpose registers, RA and GP.
+ addiu $sp, $sp, -48
+ .cfi_adjust_cfa_offset 48
+ sw $ra, 44($sp)
+ .cfi_rel_offset 31, 44
+ sw $s8, 40($sp)
+ .cfi_rel_offset 30, 40
+ sw $gp, 36($sp)
+ .cfi_rel_offset 28, 36
+ sw $s7, 32($sp)
+ .cfi_rel_offset 23, 32
+ sw $s6, 28($sp)
+ .cfi_rel_offset 22, 28
+ sw $s5, 24($sp)
+ .cfi_rel_offset 21, 24
+ sw $s4, 20($sp)
+ .cfi_rel_offset 20, 20
+ sw $s3, 16($sp)
+ .cfi_rel_offset 19, 16
+ sw $s2, 12($sp)
+ .cfi_rel_offset 18, 12
+ sw $s1, 8($sp)
+ .cfi_rel_offset 17, 8
+ sw $s0, 4($sp)
+ .cfi_rel_offset 16, 4
+
+ move $s8, $sp # Save the stack pointer
+ move $s7, $a1 # Save size of stack
+ move $s6, $a2 # Save the pc to call
+ lw rSELF, 48+20($sp) # Save managed thread pointer into rSELF
+ addiu $t0, $sp, -12 # Reserve space for stack pointer,
+ # JValue* result, and ArtMethod* slot.
+ srl $t0, $t0, 4 # Align stack pointer to 16 bytes
+ sll $sp, $t0, 4 # Update stack pointer
+ sw $s8, 4($sp) # Save old stack pointer
+ sw $a3, 8($sp) # Save JValue* result
+ sw $zero, 0($sp) # Store null for ArtMethod* at bottom of frame
+ subu $sp, $a1 # Reserve space for callee stack
+ move $a2, $a1
+ move $a1, $a0
+ move $a0, $sp
+ la $t9, memcpy
+ jalr $t9 # memcpy (dest a0, src a1, bytes a2)
+ addiu $sp, $sp, -16 # make space for argument slots for memcpy
+ bal .Losr_entry # Call the method
+ addiu $sp, $sp, 16 # restore stack after memcpy
+ lw $a2, 8($sp) # Restore JValue* result
+ lw $sp, 4($sp) # Restore saved stack pointer
+ lw $a0, 48+16($sp) # load shorty
+ lbu $a0, 0($a0) # load return type
+ li $a1, 'D' # put char 'D' into a1
+ beq $a0, $a1, .Losr_fp_result # Test if result type char == 'D'
+ li $a1, 'F' # put char 'F' into a1
+ beq $a0, $a1, .Losr_fp_result # Test if result type char == 'F'
+ nop
+ sw $v0, 0($a2)
+ b .Losr_exit
+ sw $v1, 4($a2) # store v0/v1 into result
+.Losr_fp_result:
+ SDu $f0, $f1, 0, $a2, $t0 # store f0/f1 into result
+.Losr_exit:
+ lw $ra, 44($sp)
+ .cfi_restore 31
+ lw $s8, 40($sp)
+ .cfi_restore 30
+ lw $gp, 36($sp)
+ .cfi_restore 28
+ lw $s7, 32($sp)
+ .cfi_restore 23
+ lw $s6, 28($sp)
+ .cfi_restore 22
+ lw $s5, 24($sp)
+ .cfi_restore 21
+ lw $s4, 20($sp)
+ .cfi_restore 20
+ lw $s3, 16($sp)
+ .cfi_restore 19
+ lw $s2, 12($sp)
+ .cfi_restore 18
+ lw $s1, 8($sp)
+ .cfi_restore 17
+ lw $s0, 4($sp)
+ .cfi_restore 16
+ jalr $zero, $ra
+ addiu $sp, $sp, 48
+ .cfi_adjust_cfa_offset -48
+.Losr_entry:
+ addiu $s7, $s7, -4
+ addu $t0, $s7, $sp
+ move $t9, $s6
+ jalr $zero, $t9
+ sw $ra, 0($t0) # Store RA per the compiler ABI
+END art_quick_osr_stub
+
+ /*
* On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
* FIXME: just guessing about the shape of the jmpbuf. Where will pc be?
*/
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index f1e605a..f31b92a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -358,6 +358,138 @@
.endm
/*
+ * On stack replacement stub.
+ * On entry:
+ * a0 = stack to copy
+ * a1 = size of stack
+ * a2 = pc to call
+ * a3 = JValue* result
+ * a4 = shorty
+ * a5 = thread
+ */
+ENTRY art_quick_osr_stub
+ move $t0, $sp # save stack pointer
+ daddiu $t1, $sp, -112 # reserve stack space
+ dsrl $t1, $t1, 4 # enforce 16 byte stack alignment
+ dsll $sp, $t1, 4 # update stack pointer
+
+ // Save callee general purpose registers, SP, T8(GP), RA, A3, and A4 (8x14 bytes)
+ sd $ra, 104($sp)
+ .cfi_rel_offset 31, 104
+ sd $s8, 96($sp)
+ .cfi_rel_offset 30, 96
+ sd $t0, 88($sp) # save original stack pointer stored in t0
+ .cfi_rel_offset 29, 88
+ sd $t8, 80($sp) # t8 holds caller's gp, now save it to the stack.
+ .cfi_rel_offset 28, 80 # Value from gp is pushed, so set the cfi offset accordingly.
+ sd $s7, 72($sp)
+ .cfi_rel_offset 23, 72
+ sd $s6, 64($sp)
+ .cfi_rel_offset 22, 64
+ sd $s5, 56($sp)
+ .cfi_rel_offset 21, 56
+ sd $s4, 48($sp)
+ .cfi_rel_offset 20, 48
+ sd $s3, 40($sp)
+ .cfi_rel_offset 19, 40
+ sd $s2, 32($sp)
+ .cfi_rel_offset 18, 32
+ sd $s1, 24($sp)
+ .cfi_rel_offset 17, 24
+ sd $s0, 16($sp)
+ .cfi_rel_offset 16, 16
+ sd $a4, 8($sp)
+ .cfi_rel_offset 8, 8
+ sd $a3, 0($sp)
+ .cfi_rel_offset 7, 0
+ move rSELF, $a5 # Save managed thread pointer into rSELF
+
+ daddiu $sp, $sp, -16
+ jal .Losr_entry
+ sd $zero, 0($sp) # Store null for ArtMethod* at bottom of frame
+ daddiu $sp, $sp, 16
+
+ // Restore return value address and shorty address
+ ld $a4, 8($sp) # shorty address
+ .cfi_restore 8
+ ld $a3, 0($sp) # result value address
+ .cfi_restore 7
+
+ lbu $t1, 0($a4) # load return type
+ li $t2, 'D' # put char 'D' into t2
+ beq $t1, $t2, .Losr_fp_result # branch if result type char == 'D'
+ li $t2, 'F' # put char 'F' into t2
+ beq $t1, $t2, .Losr_fp_result # branch if result type char == 'F'
+ nop
+ b .Losr_exit
+ dsrl $v1, $v0, 32 # put high half of result in v1
+.Losr_fp_result:
+ mfc1 $v0, $f0
+ mfhc1 $v1, $f0 # put high half of FP result in v1
+.Losr_exit:
+ sw $v0, 0($a3) # store low half of result
+ sw $v1, 4($a3) # store high half of result
+
+ // Restore callee registers
+ ld $ra, 104($sp)
+ .cfi_restore 31
+ ld $s8, 96($sp)
+ .cfi_restore 30
+ ld $t0, 88($sp) # save SP into t0 for now
+ .cfi_restore 29
+ ld $t8, 80($sp) # Restore gp back to it's temp storage.
+ .cfi_restore 28
+ ld $s7, 72($sp)
+ .cfi_restore 23
+ ld $s6, 64($sp)
+ .cfi_restore 22
+ ld $s5, 56($sp)
+ .cfi_restore 21
+ ld $s4, 48($sp)
+ .cfi_restore 20
+ ld $s3, 40($sp)
+ .cfi_restore 19
+ ld $s2, 32($sp)
+ .cfi_restore 18
+ ld $s1, 24($sp)
+ .cfi_restore 17
+ ld $s0, 16($sp)
+ .cfi_restore 16
+ jalr $zero, $ra
+ move $sp, $t0
+
+.Losr_entry:
+ dsubu $sp, $sp, $a1 # Reserve space for callee stack
+ daddiu $a1, $a1, -8
+ daddu $t0, $a1, $sp
+ sw $ra, 0($t0) # Store low half of RA per compiler ABI
+ dsrl $t1, $ra, 32
+ sw $t1, 4($t0) # Store high half of RA per compiler ABI
+
+ // Copy arguments into callee stack
+ // Use simple copy routine for now.
+ // 4 bytes per slot.
+ // a0 = source address
+ // a1 = args length in bytes (does not include 8 bytes for RA)
+ // sp = destination address
+ beqz $a1, .Losr_loop_exit
+ daddiu $a1, $a1, -4
+ daddu $t1, $a0, $a1
+ daddu $t2, $sp, $a1
+.Losr_loop_entry:
+ lw $t0, 0($t1)
+ daddiu $t1, $t1, -4
+ sw $t0, 0($t2)
+ bne $sp, $t2, .Losr_loop_entry
+ daddiu $t2, $t2, -4
+
+.Losr_loop_exit:
+ move $t9, $a2
+ jalr $zero, $t9 # Jump to the OSR entry point.
+ nop
+END art_quick_osr_stub
+
+ /*
* On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
* FIXME: just guessing about the shape of the jmpbuf. Where will pc be?
*/
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 6496afd..8e92885 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -319,11 +319,6 @@
return false;
}
- if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
- VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA;
- return false;
- }
-
if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
// Don't attempt to do an OSR if we are close to the stack limit. Since
// the interpreter frames are still on stack, OSR has the potential