x86: restore standard stack frame layout and fix stack alignment.

Native x86 can contain SSE instructions, this requires 16-byte aligned stack
which is what GCC expects these days. Enforce this in dvmPlatformInvoke and
in mterp.

Also fix stack frame layout, so debuggers can produce backtraces across
dvmMterpStdRun invocations.

Change-Id: I43d00a6bf8210b7d3aa9276edabc08978084e4f2
diff --git a/vm/arch/x86/Call386ABI.S b/vm/arch/x86/Call386ABI.S
index c98876c..3722bfa 100644
--- a/vm/arch/x86/Call386ABI.S
+++ b/vm/arch/x86/Call386ABI.S
@@ -97,9 +97,11 @@
     movl     16(%ebp),%ebx
     testl    %ebx,%ebx
     js       dvmAbort
-/* Get the size of the variable region and grow (preserving alignment) */
+/* Get the size of the variable region, add two more slots for the first
+ * two arguments and grow (preserving alignment)
+ */
     movl     %ebx,%ecx
-    leal     12(,%ecx,4),%ecx
+    leal     20(,%ecx,4),%ecx
     andl     $0x0003FFF0,%ecx
     subl     %ecx,%esp
 /* Handle this/class */
@@ -111,8 +113,8 @@
     movl     (%esi),%eax
     addl     $4,%esi
 isClass:
-    pushl    %eax
-    pushl    %ecx
+    movl     %eax,4(%esp)
+    movl     %ecx,0(%esp)
 /* Now, copy the variable arguments region */
     movl     %ebx,%ecx
     andl     $0x0000FFFF,%ecx
diff --git a/vm/mterp/out/InterpAsm-x86.S b/vm/mterp/out/InterpAsm-x86.S
index b2bcd08..46394f1 100644
--- a/vm/mterp/out/InterpAsm-x86.S
+++ b/vm/mterp/out/InterpAsm-x86.S
@@ -46,8 +46,7 @@
 
 Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
 
-Alignment of stack not strictly required, but should be for performance.  We'll
-align frame sizes to 16-byte multiples.
+Stack must be 16-byte aligned to support SSE in native code.
 
 If we're not doing variable stack allocation (alloca), the frame pointer can be
 eliminated and all arg references adjusted to be esp relative.
@@ -74,7 +73,7 @@
 
 */
 
-#define rGLUE    (%ebp)
+#define rGLUE    8(%ebp)
 #define rPC      %esi
 #define rFP      %edi
 #define rINST    %ebx
@@ -84,14 +83,13 @@
 
 
 /* Frame diagram while executing dvmMterpStdRun, high to low addresses */
-#define IN_ARG0        ( 12)
-#define CALLER_RP      (  8)
-#define PREV_FP        (  4)
-#define rGLUE_SPILL    (  0) /* <- dvmMterpStdRun ebp */
+#define IN_ARG0        (  8)
+#define CALLER_RP      (  4)
+#define PREV_FP        (  0) /* <- dvmMterpStdRun ebp */
 /* Spill offsets relative to %ebp */
 #define EDI_SPILL      ( -4)
 #define ESI_SPILL      ( -8)
-#define EBX_SPILL      (-12) /* <- esp following dmMterpStdRun header */
+#define EBX_SPILL      (-12)
 #define rPC_SPILL      (-16)
 #define rFP_SPILL      (-20)
 #define rINST_SPILL    (-24)
@@ -102,13 +100,13 @@
 #define LOCAL1_OFFSET  (-44)
 #define LOCAL2_OFFSET  (-48)
 #define LOCAL3_OFFSET  (-52)
-/* Out Arg offsets, relative to %sp */
+/* Out Arg offsets, relative to %esp */
 #define OUT_ARG4       ( 16)
 #define OUT_ARG3       ( 12)
 #define OUT_ARG2       (  8)
 #define OUT_ARG1       (  4)
 #define OUT_ARG0       (  0)  /* <- dvmMterpStdRun esp */
-#define FRAME_SIZE     80
+#define FRAME_SIZE     76
 
 #define SPILL(reg) movl reg##,reg##_SPILL(%ebp)
 #define UNSPILL(reg) movl reg##_SPILL(%ebp),reg
@@ -8753,17 +8751,17 @@
  *
  */
 dvmMterpStdRun:
-    movl    4(%esp), %ecx        # get incoming rGLUE
     push    %ebp                 # save caller base pointer
-    push    %ecx                 # save rGLUE at (%ebp)
     movl    %esp, %ebp           # set our %ebp
+    movl    rGLUE, %ecx          # get incoming rGLUE
+
 /*
- * At this point we've allocated two slots on the stack
+ * At this point we've allocated one slot on the stack
  * via push and stack is 8-byte aligned.  Allocate space
- * for 8 spill slots, 3 local slots, 5 arg slots + 2 slots for
- * padding to bring us to 16-byte alignment
+ * for 9 spill slots, 4 local slots, 5 arg slots to bring
+ * us to 16-byte alignment
  */
-    subl    $(FRAME_SIZE-8), %esp
+    subl    $(FRAME_SIZE-4), %esp
 
 /* Spill callee save regs */
     movl    %edi,EDI_SPILL(%ebp)
@@ -8824,12 +8822,12 @@
     movl    8(%esp),%eax                 # changeInterp to return reg
     movl    offGlue_bailPtr(%ecx),%esp   # Restore "setjmp" esp
     movl    %esp,%ebp
-    addl    $(FRAME_SIZE-8), %ebp       # Restore %ebp at point of setjmp
+    addl    $(FRAME_SIZE-4), %ebp        # Restore %ebp at point of setjmp
     movl    EDI_SPILL(%ebp),%edi
     movl    ESI_SPILL(%ebp),%esi
     movl    EBX_SPILL(%ebp),%ebx
-    movl    PREV_FP(%ebp),%ebp           # restore caller's ebp
-    addl    $FRAME_SIZE,%esp                    # strip frame
+    movl    %ebp, %esp                   # strip frame
+    pop     %ebp                         # restore caller's ebp
     ret                                  # return to dvmMterpStdRun's caller
 
 
@@ -9328,24 +9326,23 @@
 
 .LinvokeNative:
     movl        rGLUE,%ecx              # %ecx<- pMterpGlue
-    movl        %eax, OUT_ARG1(%esp)    # push parameter methodToCall
+    movl        %eax, OUT_ARG2(%esp)    # push parameter methodToCall
     movl        offGlue_self(%ecx), %ecx        # %ecx<- glue->self
     movl        offThread_jniLocal_topCookie(%ecx), %eax # %eax<- self->localRef->...
     movl        %eax, offStackSaveArea_localRefCookie(%edx) # newSaveArea->localRefCookie<- top
-    movl        %edx, OUT_ARG4(%esp)    # save newSaveArea
+    movl        %edx, LOCAL3_OFFSET(%ebp)    # save newSaveArea
     movl        LOCAL1_OFFSET(%ebp), %edx # %edx<- newFP
     movl        %edx, offThread_curFrame(%ecx)  # glue->self->curFrame<- newFP
-    movl        %ecx, OUT_ARG3(%esp)    # save glue->self
-    movl        %ecx, OUT_ARG2(%esp)    # push parameter glue->self
+    movl        %ecx, OUT_ARG3(%esp)    # push parameter glue->self
     movl        rGLUE,%ecx              # %ecx<- pMterpGlue
-    movl        OUT_ARG1(%esp), %eax    # %eax<- methodToCall
+    movl        OUT_ARG2(%esp), %eax    # %eax<- methodToCall
     lea         offGlue_retval(%ecx), %ecx # %ecx<- &retval
-    movl        %ecx, OUT_ARG0(%esp)    # push parameter pMterpGlue
-    push        %edx                    # push parameter newFP
+    movl        %ecx, OUT_ARG1(%esp)    # push parameter &retval
+    movl        %edx, OUT_ARG0(%esp)    # push parameter newFP
 
     call        *offMethod_nativeFunc(%eax) # call methodToCall->nativeFunc
-    lea         4(%esp), %esp
-    movl        OUT_ARG4(%esp), %ecx    # %ecx<- newSaveArea
+
+    movl        LOCAL3_OFFSET(%ebp), %ecx    # %ecx<- newSaveArea
     movl        OUT_ARG3(%esp), %eax    # %eax<- glue->self
     movl        offStackSaveArea_localRefCookie(%ecx), %edx # %edx<- old top
     cmp         $0, offThread_exception(%eax) # check for exception
diff --git a/vm/mterp/x86/entry.S b/vm/mterp/x86/entry.S
index 27ef51c..3b5c024 100644
--- a/vm/mterp/x86/entry.S
+++ b/vm/mterp/x86/entry.S
@@ -25,17 +25,17 @@
  *
  */
 dvmMterpStdRun:
-    movl    4(%esp), %ecx        # get incoming rGLUE
     push    %ebp                 # save caller base pointer
-    push    %ecx                 # save rGLUE at (%ebp)
     movl    %esp, %ebp           # set our %ebp
+    movl    rGLUE, %ecx          # get incoming rGLUE
+
 /*
- * At this point we've allocated two slots on the stack
+ * At this point we've allocated one slot on the stack
  * via push and stack is 8-byte aligned.  Allocate space
- * for 8 spill slots, 3 local slots, 5 arg slots + 2 slots for
- * padding to bring us to 16-byte alignment
+ * for 9 spill slots, 4 local slots, 5 arg slots to bring
+ * us to 16-byte alignment
  */
-    subl    $$(FRAME_SIZE-8), %esp
+    subl    $$(FRAME_SIZE-4), %esp
 
 /* Spill callee save regs */
     movl    %edi,EDI_SPILL(%ebp)
@@ -96,12 +96,12 @@
     movl    8(%esp),%eax                 # changeInterp to return reg
     movl    offGlue_bailPtr(%ecx),%esp   # Restore "setjmp" esp
     movl    %esp,%ebp
-    addl    $$(FRAME_SIZE-8), %ebp       # Restore %ebp at point of setjmp
+    addl    $$(FRAME_SIZE-4), %ebp        # Restore %ebp at point of setjmp
     movl    EDI_SPILL(%ebp),%edi
     movl    ESI_SPILL(%ebp),%esi
     movl    EBX_SPILL(%ebp),%ebx
-    movl    PREV_FP(%ebp),%ebp           # restore caller's ebp
-    addl    $$FRAME_SIZE,%esp                    # strip frame
+    movl    %ebp, %esp                   # strip frame
+    pop     %ebp                         # restore caller's ebp
     ret                                  # return to dvmMterpStdRun's caller
 
 
diff --git a/vm/mterp/x86/footer.S b/vm/mterp/x86/footer.S
index 6e2c5bd..ef7f35a 100644
--- a/vm/mterp/x86/footer.S
+++ b/vm/mterp/x86/footer.S
@@ -218,24 +218,23 @@
 
 .LinvokeNative:
     movl        rGLUE,%ecx              # %ecx<- pMterpGlue
-    movl        %eax, OUT_ARG1(%esp)    # push parameter methodToCall
+    movl        %eax, OUT_ARG2(%esp)    # push parameter methodToCall
     movl        offGlue_self(%ecx), %ecx        # %ecx<- glue->self
     movl        offThread_jniLocal_topCookie(%ecx), %eax # %eax<- self->localRef->...
     movl        %eax, offStackSaveArea_localRefCookie(%edx) # newSaveArea->localRefCookie<- top
-    movl        %edx, OUT_ARG4(%esp)    # save newSaveArea
+    movl        %edx, LOCAL3_OFFSET(%ebp)    # save newSaveArea
     movl        LOCAL1_OFFSET(%ebp), %edx # %edx<- newFP
     movl        %edx, offThread_curFrame(%ecx)  # glue->self->curFrame<- newFP
-    movl        %ecx, OUT_ARG3(%esp)    # save glue->self
-    movl        %ecx, OUT_ARG2(%esp)    # push parameter glue->self
+    movl        %ecx, OUT_ARG3(%esp)    # push parameter glue->self
     movl        rGLUE,%ecx              # %ecx<- pMterpGlue
-    movl        OUT_ARG1(%esp), %eax    # %eax<- methodToCall
+    movl        OUT_ARG2(%esp), %eax    # %eax<- methodToCall
     lea         offGlue_retval(%ecx), %ecx # %ecx<- &retval
-    movl        %ecx, OUT_ARG0(%esp)    # push parameter pMterpGlue
-    push        %edx                    # push parameter newFP
+    movl        %ecx, OUT_ARG1(%esp)    # push parameter &retval
+    movl        %edx, OUT_ARG0(%esp)    # push parameter newFP
 
     call        *offMethod_nativeFunc(%eax) # call methodToCall->nativeFunc
-    lea         4(%esp), %esp
-    movl        OUT_ARG4(%esp), %ecx    # %ecx<- newSaveArea
+
+    movl        LOCAL3_OFFSET(%ebp), %ecx    # %ecx<- newSaveArea
     movl        OUT_ARG3(%esp), %eax    # %eax<- glue->self
     movl        offStackSaveArea_localRefCookie(%ecx), %edx # %edx<- old top
     cmp         $$0, offThread_exception(%eax) # check for exception
diff --git a/vm/mterp/x86/header.S b/vm/mterp/x86/header.S
index cb2ddf8..de16f0c 100644
--- a/vm/mterp/x86/header.S
+++ b/vm/mterp/x86/header.S
@@ -39,8 +39,7 @@
 
 Once past the prologue, arguments are referenced at ((argno + 2)*4)(%ebp)
 
-Alignment of stack not strictly required, but should be for performance.  We'll
-align frame sizes to 16-byte multiples.
+Stack must be 16-byte aligned to support SSE in native code.
 
 If we're not doing variable stack allocation (alloca), the frame pointer can be
 eliminated and all arg references adjusted to be esp relative.
@@ -67,7 +66,7 @@
 
 */
 
-#define rGLUE    (%ebp)
+#define rGLUE    8(%ebp)
 #define rPC      %esi
 #define rFP      %edi
 #define rINST    %ebx
@@ -77,14 +76,13 @@
 
 
 /* Frame diagram while executing dvmMterpStdRun, high to low addresses */
-#define IN_ARG0        ( 12)
-#define CALLER_RP      (  8)
-#define PREV_FP        (  4)
-#define rGLUE_SPILL    (  0) /* <- dvmMterpStdRun ebp */
+#define IN_ARG0        (  8)
+#define CALLER_RP      (  4)
+#define PREV_FP        (  0) /* <- dvmMterpStdRun ebp */
 /* Spill offsets relative to %ebp */
 #define EDI_SPILL      ( -4)
 #define ESI_SPILL      ( -8)
-#define EBX_SPILL      (-12) /* <- esp following dmMterpStdRun header */
+#define EBX_SPILL      (-12)
 #define rPC_SPILL      (-16)
 #define rFP_SPILL      (-20)
 #define rINST_SPILL    (-24)
@@ -95,13 +93,13 @@
 #define LOCAL1_OFFSET  (-44)
 #define LOCAL2_OFFSET  (-48)
 #define LOCAL3_OFFSET  (-52)
-/* Out Arg offsets, relative to %sp */
+/* Out Arg offsets, relative to %esp */
 #define OUT_ARG4       ( 16)
 #define OUT_ARG3       ( 12)
 #define OUT_ARG2       (  8)
 #define OUT_ARG1       (  4)
 #define OUT_ARG0       (  0)  /* <- dvmMterpStdRun esp */
-#define FRAME_SIZE     80
+#define FRAME_SIZE     76
 
 #define SPILL(reg) movl reg##,reg##_SPILL(%ebp)
 #define UNSPILL(reg) movl reg##_SPILL(%ebp),reg