blob: 9971b5dc499f175989c92261550e0670f4556515 [file] [log] [blame]
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* JNI method invocation. This is used to call a C/C++ JNI method. The
* argument list has to be pushed onto the native stack according to
* local calling conventions.
*
* This version supports the "new" ARM EABI.
*/
#include <machine/cpu-features.h>
#ifdef __ARM_EABI__
#ifdef EXTENDED_EABI_DEBUG
# define DBG
#else
# define DBG @
#endif
/*
Function prototype:
void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc,
const u4* argv, const char* signature, void* func, JValue* pReturn)
The method we are calling has the form:
return_type func(JNIEnv* pEnv, ClassObject* clazz, ...)
-or-
return_type func(JNIEnv* pEnv, Object* this, ...)
We receive a collection of 32-bit values which correspond to arguments from
the interpreter (e.g. float occupies one, double occupies two). It's up to
us to convert these into local calling conventions.
*/
/*
ARM EABI notes:
r0-r3 hold first 4 args to a method
r9 is given special treatment in some situations, but not for us
r10 (sl) seems to be generally available
r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
r12 (ip) is scratch -- not preserved across method calls
r13 (sp) should be managed carefully in case a signal arrives
r14 (lr) must be preserved
r15 (pc) can be tinkered with directly
r0 holds returns of <= 4 bytes
r0-r1 hold returns of 8 bytes, low word in r0
Callee must save/restore r4+ (except r12) if it modifies them.
Stack is "full descending". Only the arguments that don't fit in the first 4
registers are placed on the stack. "sp" points at the first stacked argument
(i.e. the 5th arg).
VFP: single-precision results in s0, double-precision results in d0.
In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
64-bit quantities (long long, double) must be 64-bit aligned. This means
we have to scan the method signature, identify arguments that must be
padded, and fix them up appropriately.
*/
.text
.align 2
.global dvmPlatformInvoke
.type dvmPlatformInvoke, %function
/*
* On entry:
* r0 JNIEnv (can be left alone)
* r1 clazz (NULL for virtual method calls, non-NULL for static)
* r2 arg info
* r3 argc (number of 32-bit values in argv)
* [sp] argv
* [sp,#4] short signature
* [sp,#8] func
* [sp,#12] pReturn
*
* For a virtual method call, the "this" reference is in argv[0].
*
* argInfo (32-bit int) layout:
* SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF
*
* S - if set, do things the hard way (scan the signature)
* R - return-type enumeration, really only important for "hard" FP ABI
* L - number of double-words of storage required on stack (0-30 words)
* F - pad flag -- if set, write a pad word to the stack
*
* With this arrangement we can efficiently push up to 24 words of arguments
* onto the stack. Anything requiring more than that -- which should happen
* rarely to never -- can do the slow signature scan.
*
* (We could pack the Fs more efficiently -- we know we never push two pads
* in a row, and the first word can never be a pad -- but there's really
* no need for it.)
*
* NOTE: if the called function has more than 4 words of arguments, gdb
* will not be able to unwind the stack past this method. The only way
* around this is to convince gdb to respect an explicit frame pointer.
* The stack unwinder in debuggerd *does* pay attention to fp if we set it
* up appropriately, so at least that will work.
*/
dvmPlatformInvoke:
.fnstart
/*
* Save regs.
*
* On entry to a function, "sp" must be 64-bit aligned. This means
* we have to adjust sp manually if we push an odd number of regs here
* (both here and when exiting).
*
* The ARM spec doesn't specify anything about the frame pointer. gcc
* points fp at the first saved argument, so our "full descending"
* stack looks like:
*
* pReturn
* func
* shorty
* argv <-- sp on entry
* lr <-- fp
* fp
* r9...r7
* r6 <-- sp after reg save
*
* Any arguments that need to be pushed on for the target method
* come after this. The last argument is pushed first.
*/
SAVED_REG_COUNT = 6 @ push 6 regs
FP_STACK_OFFSET = (SAVED_REG_COUNT-1) * 4 @ offset between fp and post-save sp
FP_ADJ = 4 @ fp is initial sp +4
.save {r6, r7, r8, r9, fp, lr}
stmfd sp!, {r6, r7, r8, r9, fp, lr}
.setfp fp, sp, #FP_STACK_OFFSET @ point fp at first saved reg
add fp, sp, #FP_STACK_OFFSET
@.pad #4 @ adjust for 64-bit align
@sub sp, sp, #4 @ (if we save odd number of regs)
@ Ensure 64-bit alignment. EABI guarantees sp is aligned on entry, make
@ sure we're aligned properly now.
DBG tst sp, #4 @ 64-bit aligned?
DBG bne dvmAbort @ no, fail
ldr r9, [fp, #0+FP_ADJ] @ r9<- argv
cmp r1, #0 @ calling a static method?
@ Not static, grab the "this" pointer. Note "this" is not explicitly
@ described by the method signature.
subeq r3, r3, #1 @ argc--
ldreq r1, [r9], #4 @ r1<- *argv++
@ Do we have arg padding flags in "argInfo"? (just need to check hi bit)
teq r2, #0
bmi .Lno_arg_info
/*
* "Fast" path.
*
* Make room on the stack for the arguments and copy them over,
* inserting pad words when appropriate.
*
* Currently:
* r0 don't touch
* r1 don't touch
* r2 arg info
* r3 argc
* r4-r5 don't touch (not saved)
* r6-r8 (available)
* r9 argv
* fp frame pointer
*/
.Lhave_arg_info:
@ Expand the stack by the specified amount. We want to extract the
@ count of double-words from r2, multiply it by 8, and subtract that
@ from the stack pointer.
and ip, r2, #0x0f000000 @ ip<- double-words required
mov r6, r2, lsr #28 @ r6<- return type
sub sp, sp, ip, lsr #21 @ shift right 24, then left 3
mov r8, sp @ r8<- sp (arg copy dest)
@ Stick argv in r7 and advance it past the argv values that will be
@ held in r2-r3. It's possible r3 will hold a pad, so check the
@ bit in r2. We do this by ignoring the first bit (which would
@ indicate a pad in r2) and shifting the second into the carry flag.
@ If the carry is set, r3 will hold a pad, so we adjust argv less.
@
@ (This is harmless if argc==0)
mov r7, r9
movs r2, r2, lsr #2
addcc r7, r7, #8 @ skip past 2 words, for r2 and r3
subcc r3, r3, #2
addcs r7, r7, #4 @ skip past 1 word, for r2
subcs r3, r3, #1
.Lfast_copy_loop:
@ if (--argc < 0) goto invoke
subs r3, r3, #1
bmi .Lcopy_done @ NOTE: expects original argv in r9
.Lfast_copy_loop2:
@ Get pad flag into carry bit. If it's set, we don't pull a value
@ out of argv.
movs r2, r2, lsr #1
ldrcc ip, [r7], #4 @ ip = *r7++ (pull from argv)
strcc ip, [r8], #4 @ *r8++ = ip (write to stack)
bcc .Lfast_copy_loop
DBG movcs ip, #-3 @ DEBUG DEBUG - make pad word obvious
DBG strcs ip, [r8] @ DEBUG DEBUG
add r8, r8, #4 @ if pad, just advance ip without store
b .Lfast_copy_loop2 @ don't adjust argc after writing pad
.Lcopy_done:
/*
* Currently:
* r0-r3 args (JNIEnv*, thisOrClass, arg0, arg1)
* r6 return type (enum DalvikJniReturnType)
* r9 original argv
* fp frame pointer
*
* The stack copy is complete. Grab the first two words off of argv
* and tuck them into r2/r3. If the first arg is 32-bit and the second
* arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary
* but harmless.
*
* If there are 0 or 1 arg words in argv, we will be loading uninitialized
* data into the registers, but since nothing tries to use it it's also
* harmless (assuming argv[0] and argv[1] point to valid memory, which
* is a reasonable assumption for Dalvik's interpreted stacks).
*/
ldmia r9, {r2-r3} @ r2/r3<- argv[0]/argv[1]
ldr ip, [fp, #8+FP_ADJ] @ ip<- func
#ifdef __ARM_HAVE_BLX
blx ip @ call func
#else
mov lr, pc @ call func the old-fashioned way
bx ip
#endif
@ We're back, result is in r0 or (for long/double) r0-r1.
@
@ In theory, we need to use the "return type" arg to figure out what
@ we have and how to return it. However, unless we have an FPU and
@ "hard" fp calling conventions, all we need to do is copy r0-r1 into
@ the JValue union.
@
@ Thought: could redefine DalvikJniReturnType such that single-word
@ and double-word values occupy different ranges; simple comparison
@ allows us to choose between str and stm. Probably not worthwhile.
@
cmp r6, #0 @ DALVIK_JNI_RETURN_VOID?
ldrne ip, [fp, #12+FP_ADJ] @ pReturn
sub sp, fp, #FP_STACK_OFFSET @ restore sp to post-reg-save offset
stmneia ip, {r0-r1} @ pReturn->j <- r0/r1
@ Restore the registers we saved and return. On >= ARMv5TE we can
@ restore PC directly from the saved LR.
#ifdef __ARM_HAVE_PC_INTERWORK
ldmfd sp!, {r6, r7, r8, r9, fp, pc}
#else
ldmfd sp!, {r6, r7, r8, r9, fp, lr}
bx lr
#endif
/*
* "Slow" path.
* Walk through the argument list, counting up the number of 32-bit words
* required to contain it. Then walk through it a second time, copying
* values out to the stack. (We could pre-compute the size to save
* ourselves a trip, but we'd have to store that somewhere -- this is
* sufficiently unlikely that it's not worthwhile.)
*
* Try not to make any assumptions about the number of args -- I think
* the class file format allows up to 64K words (need to verify that).
*
* Currently:
* r0 don't touch
* r1 don't touch
* r2 (available)
* r3 argc
* r4-r5 don't touch (not saved)
* r6-r8 (available)
* r9 argv
* fp frame pointer
*/
.Lno_arg_info:
mov ip, r2, lsr #28 @ ip<- return type
ldr r6, [fp, #4+FP_ADJ] @ r6<- short signature
add r6, r6, #1 @ advance past return type
mov r2, #0 @ r2<- word count, init to zero
.Lcount_loop:
ldrb ip, [r6], #1 @ ip<- *signature++
cmp ip, #0 @ end?
beq .Lcount_done @ all done, bail
add r2, r2, #1 @ count++
cmp ip, #'D' @ look for 'D' or 'J', which are 64-bit
cmpne ip, #'J'
bne .Lcount_loop
@ 64-bit value, insert padding if we're not aligned
tst r2, #1 @ odd after initial incr?
addne r2, #1 @ no, add 1 more to cover 64 bits
addeq r2, #2 @ yes, treat prev as pad, incr 2 now
b .Lcount_loop
.Lcount_done:
@ We have the padded-out word count in r2. We subtract 2 from it
@ because we don't push the first two arg words on the stack (they're
@ destined for r2/r3). Pushing them on and popping them off would be
@ simpler but slower.
subs r2, r2, #2 @ subtract 2 (for contents of r2/r3)
movmis r2, #0 @ if negative, peg at zero, set Z-flag
beq .Lcopy_done @ zero args, skip stack copy
DBG tst sp, #7 @ DEBUG - make sure sp is aligned now
DBG bne dvmAbort @ DEBUG
@ Set up to copy from r7 to r8. We copy from the second arg to the
@ last arg, which means reading and writing to ascending addresses.
sub sp, sp, r2, asl #2 @ sp<- sp - r2*4
bic sp, #4 @ subtract another 4 ifn
mov r7, r9 @ r7<- argv
mov r8, sp @ r8<- sp
@ We need to copy words from [r7] to [r8]. We walk forward through
@ the signature again, "copying" pad words when appropriate, storing
@ upward into the stack.
ldr r6, [fp, #4+FP_ADJ] @ r6<- signature
add r6, r6, #1 @ advance past return type
add r7, r7, #8 @ r7<- r7+8 (assume argv 0/1 in r2/r3)
@ Eat first arg or two, for the stuff that goes into r2/r3.
ldrb ip, [r6], #1 @ ip<- *signature++
cmp ip, #'D'
cmpne ip, #'J'
beq .Lstack_copy_loop @ 64-bit arg fills r2+r3
@ First arg was 32-bit, check the next
ldrb ip, [r6], #1 @ ip<- *signature++
cmp ip, #'D'
cmpne ip, #'J'
subeq r7, #4 @ r7<- r7-4 (take it back - pad word)
beq .Lstack_copy_loop2 @ start with char we already have
@ Two 32-bit args, fall through and start with next arg
.Lstack_copy_loop:
ldrb ip, [r6], #1 @ ip<- *signature++
.Lstack_copy_loop2:
cmp ip, #0 @ end of shorty?
beq .Lcopy_done @ yes
cmp ip, #'D'
cmpne ip, #'J'
beq .Lcopy64
@ Copy a 32-bit value. [r8] is initially at the end of the stack. We
@ use "full descending" stacks, so we store into [r8] and incr as we
@ move toward the end of the arg list.
.Lcopy32:
ldr ip, [r7], #4
str ip, [r8], #4
b .Lstack_copy_loop
.Lcopy64:
@ Copy a 64-bit value. If necessary, leave a hole in the stack to
@ ensure alignment. We know the [r8] output area is 64-bit aligned,
@ so we can just mask the address.
add r8, r8, #7 @ r8<- (r8+7) & ~7
ldr ip, [r7], #4
bic r8, r8, #7
ldr r2, [r7], #4
str ip, [r8], #4
str r2, [r8], #4
b .Lstack_copy_loop
.fnend
.size dvmPlatformInvoke, .-dvmPlatformInvoke
#if 0
/*
* Spit out a "we were here", preserving all registers. (The attempt
* to save ip won't work, but we need to save an even number of
* registers for EABI 64-bit stack alignment.)
*/
.macro SQUEAK num
common_squeak\num:
stmfd sp!, {r0, r1, r2, r3, ip, lr}
ldr r0, strSqueak
mov r1, #\num
bl printf
#ifdef __ARM_HAVE_PC_INTERWORK
ldmfd sp!, {r0, r1, r2, r3, ip, pc}
#else
ldmfd sp!, {r0, r1, r2, r3, ip, lr}
bx lr
#endif
.endm
SQUEAK 0
SQUEAK 1
SQUEAK 2
SQUEAK 3
SQUEAK 4
SQUEAK 5
strSqueak:
.word .LstrSqueak
.LstrSqueak:
.asciz "<%d>"
.align 2
#endif
#endif /*__ARM_EABI__*/