ART: Generic JNI for x86

Add the generic JNI trampoline for x86. Small cleanup in the C code.

Change-Id: Icaf9de7c0e5e8d1e6cb1135a54552040344cc5a3
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 07268ea..989ecf9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1152,8 +1152,92 @@
 END_FUNCTION art_quick_resolution_trampoline
 
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
-    int3
-    int3
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    // This also stores the native ArtMethod reference at the bottom of the stack.
+
+    movl %esp, %ebp                 // save SP at callee-save frame
+    movl %esp, %edi
+    CFI_DEF_CFA_REGISTER(edi)
+    subl LITERAL(5120), %esp
+    // prepare for artQuickGenericJniTrampoline call
+    // (Thread*,  SP)
+    //  (esp)    4(esp)   <= C calling convention
+    //  fs:...    ebp     <= where they are
+    // Also: PLT, so need GOT in ebx.
+
+    subl LITERAL(8), %esp         // Padding for 16B alignment.
+    pushl %ebp                    // Pass SP (to ArtMethod).
+    pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
+    SETUP_GOT_NOSAVE              // Clobbers ebx.
+    call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
+    // Drop call stack.
+    addl LITERAL(16), %esp
+
+    // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
+    // get the adjusted frame pointer
+    popl %ebp
+
+    // Check for error, negative value.
+    test %eax, %eax
+    js .Lentry_error
+
+    // release part of the alloca, get the code pointer
+    addl %eax, %esp
+    popl %eax
+
+    // On x86 there are no registers passed, so nothing to pop here.
+
+    // Native call.
+    call *%eax
+
+    // Pop native stack, but keep the space that was reserved cookie.
+    movl %ebp, %esp
+    subl LITERAL(16), %esp        // Alignment.
+
+    // result sign extension is handled in C code
+    // prepare for artQuickGenericJniEndTrampoline call
+    // (Thread*,  SP,  result, result_f)
+    //  (esp)   4(esp)  8(esp)  16(esp)    <= C calling convention
+    //  fs:...    ebp  eax:edx   xmm0      <= where they are
+
+    subl LITERAL(8), %esp         // Pass float result.
+    movsd %xmm0, (%esp)
+    pushl %edx                    // Pass int result.
+    pushl %eax
+    pushl %ebp                    // Pass SP (to ArtMethod).
+    pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
+    call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
+
+    // Tear down the alloca.
+    movl %edi, %esp
+    CFI_DEF_CFA_REGISTER(esp)
+
+    // Pending exceptions possible.
+    mov %fs:THREAD_EXCEPTION_OFFSET, %ebx
+    testl %ebx, %ebx
+    jnz .Lexception_in_native
+
+    // Tear down the callee-save frame.
+    addl MACRO_LITERAL(4), %esp     // Remove padding
+    CFI_ADJUST_CFA_OFFSET(-4)
+    POP ecx
+    addl MACRO_LITERAL(4), %esp     // Avoid edx, as it may be part of the result.
+    CFI_ADJUST_CFA_OFFSET(-4)
+    POP ebx
+    POP ebp  // Restore callee saves
+    POP esi
+    POP edi
+    // store into fpr, for when it's a fpr return...
+    movd %eax, %xmm0
+    movd %edx, %xmm1
+    punpckldq %xmm1, %xmm0
+    ret
+.Lentry_error:
+    movl %edi, %esp
+    CFI_DEF_CFA_REGISTER(esp)
+.Lexception_in_native:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_generic_jni_trampoline
 
 DEFINE_FUNCTION art_quick_to_interpreter_bridge
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5374f22..e986c6a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1202,10 +1202,8 @@
     size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>);
 
     sp8 -= scope_and_method;
-    // Align by kStackAlignment
-    uintptr_t sp_to_align = reinterpret_cast<uintptr_t>(sp8);
-    sp_to_align = RoundDown(sp_to_align, kStackAlignment);
-    sp8 = reinterpret_cast<uint8_t*>(sp_to_align);
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
 
     uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>);
     *table = reinterpret_cast<HandleScope*>(sp8_table);
@@ -1225,9 +1223,8 @@
 
     // Next comes the native call stack.
     sp8 -= GetStackSize();
-    // Now align the call stack below. This aligns by 16, as AArch64 seems to require.
-    uintptr_t mask = ~0x0F;
-    sp8 = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(sp8) & mask);
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
     *start_stack = reinterpret_cast<uintptr_t*>(sp8);
 
     // put fprs and gprs below