Implement on-stack replacement for MIPS32 and MIPS64

Change-Id: I4e589f0597b597adff95e1289f20deb2eab97e9b
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index fd1851f..3c0e452 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -319,6 +319,111 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   [sp + 16] = shorty
+     *   [sp + 20] = thread
+     */
+ENTRY art_quick_osr_stub
+    // Save callee general purpose registers, RA and GP.
+    addiu  $sp, $sp, -48
+    .cfi_adjust_cfa_offset 48
+    sw     $ra, 44($sp)
+    .cfi_rel_offset 31, 44
+    sw     $s8, 40($sp)
+    .cfi_rel_offset 30, 40
+    sw     $gp, 36($sp)
+    .cfi_rel_offset 28, 36
+    sw     $s7, 32($sp)
+    .cfi_rel_offset 23, 32
+    sw     $s6, 28($sp)
+    .cfi_rel_offset 22, 28
+    sw     $s5, 24($sp)
+    .cfi_rel_offset 21, 24
+    sw     $s4, 20($sp)
+    .cfi_rel_offset 20, 20
+    sw     $s3, 16($sp)
+    .cfi_rel_offset 19, 16
+    sw     $s2, 12($sp)
+    .cfi_rel_offset 18, 12
+    sw     $s1, 8($sp)
+    .cfi_rel_offset 17, 8
+    sw     $s0, 4($sp)
+    .cfi_rel_offset 16, 4
+
+    move   $s8, $sp                        # Save the stack pointer
+    move   $s7, $a1                        # Save size of stack
+    move   $s6, $a2                        # Save the pc to call
+    lw     rSELF, 48+20($sp)               # Save managed thread pointer into rSELF
+    addiu  $t0, $sp, -12                   # Reserve space for stack pointer,
+                                           #    JValue* result, and ArtMethod* slot.
+    srl    $t0, $t0, 4                     # Align stack pointer to 16 bytes
+    sll    $sp, $t0, 4                     # Update stack pointer
+    sw     $s8, 4($sp)                     # Save old stack pointer
+    sw     $a3, 8($sp)                     # Save JValue* result
+    sw     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    subu   $sp, $a1                        # Reserve space for callee stack
+    move   $a2, $a1
+    move   $a1, $a0
+    move   $a0, $sp
+    la     $t9, memcpy
+    jalr   $t9                             # memcpy (dest a0, src a1, bytes a2)
+    addiu  $sp, $sp, -16                   # make space for argument slots for memcpy
+    bal    .Losr_entry                     # Call the method
+    addiu  $sp, $sp, 16                    # restore stack after memcpy
+    lw     $a2, 8($sp)                     # Restore JValue* result
+    lw     $sp, 4($sp)                     # Restore saved stack pointer
+    lw     $a0, 48+16($sp)                 # load shorty
+    lbu    $a0, 0($a0)                     # load return type
+    li     $a1, 'D'                        # put char 'D' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'D'
+    li     $a1, 'F'                        # put char 'F' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'F'
+    nop
+    sw     $v0, 0($a2)
+    b      .Losr_exit
+    sw     $v1, 4($a2)                     # store v0/v1 into result
+.Losr_fp_result:
+    SDu    $f0, $f1, 0, $a2, $t0           # store f0/f1 into result
+.Losr_exit:
+    lw     $ra, 44($sp)
+    .cfi_restore 31
+    lw     $s8, 40($sp)
+    .cfi_restore 30
+    lw     $gp, 36($sp)
+    .cfi_restore 28
+    lw     $s7, 32($sp)
+    .cfi_restore 23
+    lw     $s6, 28($sp)
+    .cfi_restore 22
+    lw     $s5, 24($sp)
+    .cfi_restore 21
+    lw     $s4, 20($sp)
+    .cfi_restore 20
+    lw     $s3, 16($sp)
+    .cfi_restore 19
+    lw     $s2, 12($sp)
+    .cfi_restore 18
+    lw     $s1, 8($sp)
+    .cfi_restore 17
+    lw     $s0, 4($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    addiu  $sp, $sp, 48
+    .cfi_adjust_cfa_offset -48
+.Losr_entry:
+    addiu  $s7, $s7, -4
+    addu   $t0, $s7, $sp
+    move   $t9, $s6
+    jalr   $zero, $t9
+    sw     $ra, 0($t0)                     # Store RA per the compiler ABI
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index f1e605a..f31b92a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -358,6 +358,138 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   a4 = shorty
+     *   a5 = thread
+     */
+ENTRY art_quick_osr_stub
+    move   $t0, $sp               # save stack pointer
+    daddiu $t1, $sp, -112         # reserve stack space
+    dsrl   $t1, $t1, 4            # enforce 16 byte stack alignment
+    dsll   $sp, $t1, 4            # update stack pointer
+
+    // Save callee general purpose registers, SP, T8(GP), RA, A3, and A4 (8x14 bytes)
+    sd     $ra, 104($sp)
+    .cfi_rel_offset 31, 104
+    sd     $s8, 96($sp)
+    .cfi_rel_offset 30, 96
+    sd     $t0, 88($sp)           # save original stack pointer stored in t0
+    .cfi_rel_offset 29, 88
+    sd     $t8, 80($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 80        # Value from gp is pushed, so set the cfi offset accordingly.
+    sd     $s7, 72($sp)
+    .cfi_rel_offset 23, 72
+    sd     $s6, 64($sp)
+    .cfi_rel_offset 22, 64
+    sd     $s5, 56($sp)
+    .cfi_rel_offset 21, 56
+    sd     $s4, 48($sp)
+    .cfi_rel_offset 20, 48
+    sd     $s3, 40($sp)
+    .cfi_rel_offset 19, 40
+    sd     $s2, 32($sp)
+    .cfi_rel_offset 18, 32
+    sd     $s1, 24($sp)
+    .cfi_rel_offset 17, 24
+    sd     $s0, 16($sp)
+    .cfi_rel_offset 16, 16
+    sd     $a4, 8($sp)
+    .cfi_rel_offset 8, 8
+    sd     $a3, 0($sp)
+    .cfi_rel_offset 7, 0
+    move   rSELF, $a5                      # Save managed thread pointer into rSELF
+
+    daddiu $sp, $sp, -16
+    jal    .Losr_entry
+    sd     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    daddiu $sp, $sp, 16
+
+    // Restore return value address and shorty address
+    ld     $a4, 8($sp)                     # shorty address
+    .cfi_restore 8
+    ld     $a3, 0($sp)                     # result value address
+    .cfi_restore 7
+
+    lbu    $t1, 0($a4)                     # load return type
+    li     $t2, 'D'                        # put char 'D' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'D'
+    li     $t2, 'F'                        # put char 'F' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'F'
+    nop
+    b      .Losr_exit
+    dsrl   $v1, $v0, 32                    # put high half of result in v1
+.Losr_fp_result:
+    mfc1   $v0, $f0
+    mfhc1  $v1, $f0                        # put high half of FP result in v1
+.Losr_exit:
+    sw     $v0, 0($a3)                     # store low half of result
+    sw     $v1, 4($a3)                     # store high half of result
+
+    // Restore callee registers
+    ld     $ra, 104($sp)
+    .cfi_restore 31
+    ld     $s8, 96($sp)
+    .cfi_restore 30
+    ld     $t0, 88($sp)                    # save SP into t0 for now
+    .cfi_restore 29
+    ld     $t8, 80($sp)                    # Restore gp back to it's temp storage.
+    .cfi_restore 28
+    ld     $s7, 72($sp)
+    .cfi_restore 23
+    ld     $s6, 64($sp)
+    .cfi_restore 22
+    ld     $s5, 56($sp)
+    .cfi_restore 21
+    ld     $s4, 48($sp)
+    .cfi_restore 20
+    ld     $s3, 40($sp)
+    .cfi_restore 19
+    ld     $s2, 32($sp)
+    .cfi_restore 18
+    ld     $s1, 24($sp)
+    .cfi_restore 17
+    ld     $s0, 16($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    move   $sp, $t0
+
+.Losr_entry:
+    dsubu  $sp, $sp, $a1                   # Reserve space for callee stack
+    daddiu $a1, $a1, -8
+    daddu  $t0, $a1, $sp
+    sw     $ra, 0($t0)                     # Store low half of RA per compiler ABI
+    dsrl   $t1, $ra, 32
+    sw     $t1, 4($t0)                     # Store high half of RA per compiler ABI
+
+    // Copy arguments into callee stack
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // a0 = source address
+    // a1 = args length in bytes (does not include 8 bytes for RA)
+    // sp = destination address
+    beqz   $a1, .Losr_loop_exit
+    daddiu $a1, $a1, -4
+    daddu  $t1, $a0, $a1
+    daddu  $t2, $sp, $a1
+.Losr_loop_entry:
+    lw     $t0, 0($t1)
+    daddiu $t1, $t1, -4
+    sw     $t0, 0($t2)
+    bne    $sp, $t2, .Losr_loop_entry
+    daddiu $t2, $t2, -4
+
+.Losr_loop_exit:
+    move   $t9, $a2
+    jalr   $zero, $t9                      # Jump to the OSR entry point.
+    nop
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 6496afd..8e92885 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -319,11 +319,6 @@
     return false;
   }
 
-  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
-    VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA;
-    return false;
-  }
-
   if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
     // Don't attempt to do an OSR if we are close to the stack limit. Since
     // the interpreter frames are still on stack, OSR has the potential