blob: a20101078fb2d11d79fcc6ef9f1d8b01a7a97471 [file] [log] [blame]
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace art {
/*
* This source files contains "gen" codegen routines that should
* be applicable to most targets. Only mid-level support utilities
* and "op" calls may be used here.
*/
/*
* x86 targets will likely be different enough to need their own
* invoke gen routies.
*/
#if defined(TARGET_ARM) || defined (TARGET_MIPS)
typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
uint32_t methodIdx);
/*
* If there are any ins passed in registers that have not been promoted
* to a callee-save register, flush them to the frame. Perform intial
* assignment of promoted arguments.
*/
void flushIns(CompilationUnit* cUnit)
{
if (cUnit->numIns == 0)
return;
int firstArgReg = rARG1;
int lastArgReg = rARG3;
int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
/*
* Arguments passed in registers should be flushed
* to their backing locations in the frame for now.
* Also, we need to do initial assignment for promoted
* arguments. NOTE: an older version of dx had an issue
* in which it would reuse static method argument registers.
* This could result in the same Dalvik virtual register
* being promoted to both core and fp regs. In those
* cases, copy argument to both. This will be uncommon
* enough that it isn't worth attempting to optimize.
*/
for (int i = 0; i < cUnit->numIns; i++) {
PromotionMap vMap = cUnit->promotionMap[startVReg + i];
if (i <= (lastArgReg - firstArgReg)) {
// If arriving in register
if (vMap.coreLocation == kLocPhysReg) {
genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
}
if (vMap.fpLocation == kLocPhysReg) {
genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
}
// Also put a copy in memory in case we're partially promoted
storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
firstArgReg + i, kWord);
} else {
// If arriving in frame & promoted
if (vMap.coreLocation == kLocPhysReg) {
loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
vMap.coreReg);
}
if (vMap.fpLocation == kLocPhysReg) {
loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
vMap.fpReg);
}
}
}
}
/*
* Bit of a hack here - in leiu of a real scheduling pass,
* emit the next instruction in static & direct invoke sequences.
*/
int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
int state, uint32_t dexIdx, uint32_t unused)
{
switch(state) {
case 0: // Get the current Method* [sets rARG0]
loadCurrMethodDirect(cUnit, rARG0);
break;
case 1: // Get method->dex_cache_resolved_methods_
loadWordDisp(cUnit, rARG0,
Method::DexCacheResolvedMethodsOffset().Int32Value(),
rARG0);
break;
case 2: // Grab target method*
loadWordDisp(cUnit, rARG0,
Array::DataOffset(sizeof(Object*)).Int32Value() + dexIdx * 4,
rARG0);
break;
case 3: // Grab the code from the method*
loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
rLINK);
break;
default:
return -1;
}
return state + 1;
}
/*
* Bit of a hack here - in leiu of a real scheduling pass,
* emit the next instruction in a virtual invoke sequence.
* We can use rLR as a temp prior to target address loading
* Note also that we'll load the first argument ("this") into
* rARG1 here rather than the standard loadArgRegs.
*/
int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
int state, uint32_t dexIdx, uint32_t methodIdx)
{
RegLocation rlArg;
/*
* This is the fast path in which the target virtual method is
* fully resolved at compile time.
*/
switch(state) {
case 0: // Get "this" [set rARG1]
rlArg = oatGetSrc(cUnit, mir, 0);
loadValueDirectFixed(cUnit, rlArg, rARG1);
break;
case 1: // Is "this" null? [use rARG1]
genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
// get this->klass_ [use rARG1, set rLINK]
loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
rLINK);
break;
case 2: // Get this->klass_->vtable [usr rLINK, set rLINK]
loadWordDisp(cUnit, rLINK, Class::VTableOffset().Int32Value(),
rLINK);
break;
case 3: // Get target method [use rLINK, set rARG0]
loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
Array::DataOffset(sizeof(Object*)).Int32Value(),
rARG0);
break;
case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
rLINK);
break;
default:
return -1;
}
return state + 1;
}
/*
* Interleave launch code for INVOKE_SUPER. See comments
* for nextVCallIns.
*/
int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
int state, uint32_t dexIdx, uint32_t methodIdx)
{
/*
* This is the fast path in which the target virtual method is
* fully resolved at compile time. Note also that this path assumes
* that the check to verify that the target method index falls
* within the size of the super's vtable has been done at compile-time.
*/
RegLocation rlArg;
switch(state) {
case 0: // Get current Method* [set rARG0]
loadCurrMethodDirect(cUnit, rARG0);
// Load "this" [set rARG1]
rlArg = oatGetSrc(cUnit, mir, 0);
loadValueDirectFixed(cUnit, rlArg, rARG1);
// Get method->declaring_class_ [use rARG0, set rLINK]
loadWordDisp(cUnit, rARG0,
Method::DeclaringClassOffset().Int32Value(),
rLINK);
// Is "this" null? [use rARG1]
genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
break;
case 1: // Get method->declaring_class_->super_class [use/set rLINK]
loadWordDisp(cUnit, rLINK,
Class::SuperClassOffset().Int32Value(), rLINK);
break;
case 2: // Get ...->super_class_->vtable [u/s rLINK]
loadWordDisp(cUnit, rLINK,
Class::VTableOffset().Int32Value(), rLINK);
break;
case 3: // Get target method [use rLINK, set rARG0]
loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
Array::DataOffset(sizeof(Object*)).Int32Value(),
rARG0);
break;
case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
rLINK);
break;
default:
return -1;
}
return state + 1;
}
int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
int state, uint32_t dexIdx, uint32_t methodIdx)
{
/*
* This handles the case in which the base method is not fully
* resolved at compile time, we bail to a runtime helper.
*/
if (state == 0) {
// Load trampoline target
loadWordDisp(cUnit, rSELF, trampoline, rLINK);
// Load rARG0 with method index
loadConstant(cUnit, rARG0, dexIdx);
return 1;
}
return -1;
}
int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
int state, uint32_t dexIdx, uint32_t methodIdx)
{
int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}
int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
uint32_t dexIdx, uint32_t methodIdx)
{
int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}
int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
uint32_t dexIdx, uint32_t methodIdx)
{
int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}
int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
uint32_t dexIdx, uint32_t methodIdx)
{
int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}
/*
* All invoke-interface calls bounce off of art_invoke_interface_trampoline,
* which will locate the target and continue on via a tail call.
*/
int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
uint32_t dexIdx, uint32_t unused)
{
int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}
int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
int state, uint32_t dexIdx,
uint32_t unused)
{
int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}
int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
uint32_t methodIdx, bool skipThis)
{
int nextReg = rARG1;
int nextArg = 0;
if (skipThis) {
nextReg++;
nextArg++;
}
for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
rlArg = oatUpdateRawLoc(cUnit, rlArg);
if (rlArg.wide && (nextReg <= rARG2)) {
loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
nextReg++;
nextArg++;
} else {
rlArg.wide = false;
loadValueDirectFixed(cUnit, rlArg, nextReg);
}
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
}
return callState;
}
/*
* Load up to 5 arguments, the first three of which will be in
* rARG1 .. rARG3. On entry rARG0 contains the current method pointer,
* and as part of the load sequence, it must be replaced with
* the target method pointer. Note, this may also be called
* for "range" variants if the number of arguments is 5 or fewer.
*/
int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
DecodedInstruction* dInsn, int callState,
LIR** pcrLabel, NextCallInsn nextCallInsn,
uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
{
RegLocation rlArg;
/* If no arguments, just return */
if (dInsn->vA == 0)
return callState;
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
DCHECK_LE(dInsn->vA, 5U);
if (dInsn->vA > 3) {
uint32_t nextUse = 3;
//Detect special case of wide arg spanning arg3/arg4
RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
rlUse2.wide) {
int reg;
// Wide spans, we need the 2nd half of uses[2].
rlArg = oatUpdateLocWide(cUnit, rlUse2);
if (rlArg.location == kLocPhysReg) {
reg = rlArg.highReg;
} else {
// rARG2 & rARG3 can safely be used here
reg = rARG3;
loadWordDisp(cUnit, rSP,
oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
callState = nextCallInsn(cUnit, mir, callState, dexIdx,
methodIdx);
}
storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
nextUse++;
}
// Loop through the rest
while (nextUse < dInsn->vA) {
int lowReg;
int highReg;
rlArg = oatGetRawSrc(cUnit, mir, nextUse);
rlArg = oatUpdateRawLoc(cUnit, rlArg);
if (rlArg.location == kLocPhysReg) {
lowReg = rlArg.lowReg;
highReg = rlArg.highReg;
} else {
lowReg = rARG2;
highReg = rARG3;
if (rlArg.wide) {
loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
} else {
loadValueDirectFixed(cUnit, rlArg, lowReg);
}
callState = nextCallInsn(cUnit, mir, callState, dexIdx,
methodIdx);
}
int outsOffset = (nextUse + 1) * 4;
if (rlArg.wide) {
storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
nextUse += 2;
} else {
storeWordDisp(cUnit, rSP, outsOffset, lowReg);
nextUse++;
}
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
}
}
callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
dexIdx, methodIdx, skipThis);
if (pcrLabel) {
*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
}
return callState;
}
/*
* May have 0+ arguments (also used for jumbo). Note that
* source virtual registers may be in physical registers, so may
* need to be flushed to home location before copying. This
* applies to arg3 and above (see below).
*
* Two general strategies:
* If < 20 arguments
* Pass args 3-18 using vldm/vstm block copy
* Pass arg0, arg1 & arg2 in rARG1-rARG3
* If 20+ arguments
* Pass args arg19+ using memcpy block copy
* Pass arg0, arg1 & arg2 in rARG1-rARG3
*
*/
int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
DecodedInstruction* dInsn, int callState,
LIR** pcrLabel, NextCallInsn nextCallInsn,
uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
{
int firstArg = dInsn->vC;
int numArgs = dInsn->vA;
// If we can treat it as non-range (Jumbo ops will use range form)
if (numArgs <= 5)
return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
nextCallInsn, dexIdx, methodIdx,
skipThis);
/*
* Make sure range list doesn't span the break between in normal
* Dalvik vRegs and the ins.
*/
int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
LOG(FATAL) << "Argument list spanned locals & args";
}
/*
* First load the non-register arguments. Both forms expect all
* of the source arguments to be in their home frame location, so
* scan the sReg names and flush any that have been promoted to
* frame backing storage.
*/
// Scan the rest of the args - if in physReg flush to memory
for (int nextArg = 0; nextArg < numArgs;) {
RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
if (loc.wide) {
loc = oatUpdateLocWide(cUnit, loc);
if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
storeBaseDispWide(cUnit, rSP,
oatSRegOffset(cUnit, loc.sRegLow),
loc.lowReg, loc.highReg);
}
nextArg += 2;
} else {
loc = oatUpdateLoc(cUnit, loc);
if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
loc.lowReg, kWord);
}
nextArg++;
}
}
int startOffset = oatSRegOffset(cUnit,
cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
int outsOffset = 4 /* Method* */ + (3 * 4);
#if defined(TARGET_MIPS)
// Generate memcpy
opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
callRuntimeHelper(cUnit, rTgt);
// Restore Method*
loadCurrMethodDirect(cUnit, rARG0);
#else
if (numArgs >= 20) {
// Generate memcpy
opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
callRuntimeHelper(cUnit, rTgt);
// Restore Method*
loadCurrMethodDirect(cUnit, rARG0);
} else {
// Use vldm/vstm pair using rARG3 as a temp
int regsLeft = std::min(numArgs - 3, 16);
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
//TUNING: loosen barrier
ld->defMask = ENCODE_ALL;
setMemRefType(ld, true /* isLoad */, kDalvikReg);
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* */ + (3 * 4));
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
setMemRefType(st, false /* isLoad */, kDalvikReg);
st->defMask = ENCODE_ALL;
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
}
#endif
callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
dexIdx, methodIdx, skipThis);
callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
if (pcrLabel) {
*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
}
return callState;
}
#endif // TARGET_ARM || TARGET_MIPS
} // namespace art