src/compiler/codegen/GenInvoke.cc - platform/art - Git at Google

 /*
  * Copyright (C) 2012 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 namespace art {

 /*
  * This source files contains "gen" codegen routines that should
  * be applicable to most targets.  Only mid-level support utilities
  * and "op" calls may be used here.
  */


 /*
  * x86 targets will likely be different enough to need their own
  * invoke gen routies.
  */
 #if defined(TARGET_ARM) || defined (TARGET_MIPS)
 typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
                             uint32_t methodIdx);
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform intial
  * assignment of promoted arguments.
  */
 void flushIns(CompilationUnit* cUnit)
 {
     if (cUnit->numIns == 0)
         return;
     int firstArgReg = rARG1;
     int lastArgReg = rARG3;
     int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
     /*
      * Arguments passed in registers should be flushed
      * to their backing locations in the frame for now.
      * Also, we need to do initial assignment for promoted
      * arguments.  NOTE: an older version of dx had an issue
      * in which it would reuse static method argument registers.
      * This could result in the same Dalvik virtual register
      * being promoted to both core and fp regs.  In those
      * cases, copy argument to both.  This will be uncommon
      * enough that it isn't worth attempting to optimize.
      */
     for (int i = 0; i < cUnit->numIns; i++) {
         PromotionMap vMap = cUnit->promotionMap[startVReg + i];
         if (i <= (lastArgReg - firstArgReg)) {
             // If arriving in register
             if (vMap.coreLocation == kLocPhysReg) {
                 genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
             }
             if (vMap.fpLocation == kLocPhysReg) {
                 genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
             }
             // Also put a copy in memory in case we're partially promoted
             storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
                           firstArgReg + i, kWord);
         } else {
             // If arriving in frame & promoted
             if (vMap.coreLocation == kLocPhysReg) {
                 loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
                              vMap.coreReg);
             }
             if (vMap.fpLocation == kLocPhysReg) {
                 loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
                              vMap.fpReg);
             }
         }
     }
 }

 /*
  * Bit of a hack here - in leiu of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
 int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
                    int state, uint32_t dexIdx, uint32_t unused)
 {
     switch(state) {
         case 0:  // Get the current Method* [sets rARG0]
             loadCurrMethodDirect(cUnit, rARG0);
             break;
         case 1:  // Get method->dex_cache_resolved_methods_
             loadWordDisp(cUnit, rARG0,
                 Method::DexCacheResolvedMethodsOffset().Int32Value(),
                 rARG0);
             break;
         case 2:  // Grab target method*
             loadWordDisp(cUnit, rARG0,
                 Array::DataOffset(sizeof(Object*)).Int32Value() + dexIdx * 4,
                 rARG0);
             break;
         case 3:  // Grab the code from the method*
             loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
                          rLINK);
             break;
         default:
             return -1;
     }
     return state + 1;
 }

 /*
  * Bit of a hack here - in leiu of a real scheduling pass,
  * emit the next instruction in a virtual invoke sequence.
  * We can use rLR as a temp prior to target address loading
  * Note also that we'll load the first argument ("this") into
  * rARG1 here rather than the standard loadArgRegs.
  */
 int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
                   int state, uint32_t dexIdx, uint32_t methodIdx)
 {
     RegLocation rlArg;
     /*
      * This is the fast path in which the target virtual method is
      * fully resolved at compile time.
      */
     switch(state) {
         case 0:  // Get "this" [set rARG1]
             rlArg = oatGetSrc(cUnit, mir, 0);
             loadValueDirectFixed(cUnit, rlArg, rARG1);
             break;
         case 1: // Is "this" null? [use rARG1]
             genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
             // get this->klass_ [use rARG1, set rLINK]
             loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
                          rLINK);
             break;
         case 2: // Get this->klass_->vtable [usr rLINK, set rLINK]
             loadWordDisp(cUnit, rLINK, Class::VTableOffset().Int32Value(),
                          rLINK);
             break;
         case 3: // Get target method [use rLINK, set rARG0]
             loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
                          Array::DataOffset(sizeof(Object*)).Int32Value(),
                          rARG0);
             break;
         case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
             loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
                          rLINK);
             break;
         default:
             return -1;
     }
     return state + 1;
 }

 /*
  * Interleave launch code for INVOKE_SUPER.  See comments
  * for nextVCallIns.
  */
 int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
                       int state, uint32_t dexIdx, uint32_t methodIdx)
 {
     /*
      * This is the fast path in which the target virtual method is
      * fully resolved at compile time.  Note also that this path assumes
      * that the check to verify that the target method index falls
      * within the size of the super's vtable has been done at compile-time.
      */
     RegLocation rlArg;
     switch(state) {
         case 0: // Get current Method* [set rARG0]
             loadCurrMethodDirect(cUnit, rARG0);
             // Load "this" [set rARG1]
             rlArg = oatGetSrc(cUnit, mir, 0);
             loadValueDirectFixed(cUnit, rlArg, rARG1);
             // Get method->declaring_class_ [use rARG0, set rLINK]
             loadWordDisp(cUnit, rARG0,
                          Method::DeclaringClassOffset().Int32Value(),
                          rLINK);
             // Is "this" null? [use rARG1]
             genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
             break;
         case 1: // Get method->declaring_class_->super_class [use/set rLINK]
             loadWordDisp(cUnit, rLINK,
                          Class::SuperClassOffset().Int32Value(), rLINK);
             break;
         case 2: // Get ...->super_class_->vtable [u/s rLINK]
             loadWordDisp(cUnit, rLINK,
                          Class::VTableOffset().Int32Value(), rLINK);
             break;
         case 3: // Get target method [use rLINK, set rARG0]
             loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
                          Array::DataOffset(sizeof(Object*)).Int32Value(),
                          rARG0);
             break;
         case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
             loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
                          rLINK);
             break;
         default:
             return -1;
     }
     return state + 1;
 }

 int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
                      int state, uint32_t dexIdx, uint32_t methodIdx)
 {
     /*
      * This handles the case in which the base method is not fully
      * resolved at compile time, we bail to a runtime helper.
      */
     if (state == 0) {
         // Load trampoline target
         loadWordDisp(cUnit, rSELF, trampoline, rLINK);
         // Load rARG0 with method index
         loadConstant(cUnit, rARG0, dexIdx);
         return 1;
     }
     return -1;
 }

 int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
                          int state, uint32_t dexIdx, uint32_t methodIdx)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }

 int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
                          uint32_t dexIdx, uint32_t methodIdx)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }

 int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
                         uint32_t dexIdx, uint32_t methodIdx)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }

 int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
                     uint32_t dexIdx, uint32_t methodIdx)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }

 /*
  * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
  * which will locate the target and continue on via a tail call.
  */
 int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
                           uint32_t dexIdx, uint32_t unused)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }

 int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
                                          int state, uint32_t dexIdx,
                                          uint32_t unused)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }

 int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
                 int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
                 uint32_t methodIdx, bool skipThis)
 {
     int nextReg = rARG1;
     int nextArg = 0;
     if (skipThis) {
         nextReg++;
         nextArg++;
     }
     for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
         RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
         rlArg = oatUpdateRawLoc(cUnit, rlArg);
         if (rlArg.wide && (nextReg <= rARG2)) {
             loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
             nextReg++;
             nextArg++;
         } else {
             rlArg.wide = false;
             loadValueDirectFixed(cUnit, rlArg, nextReg);
         }
         callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
     }
     return callState;
 }

 /*
  * Load up to 5 arguments, the first three of which will be in
  * rARG1 .. rARG3.  On entry rARG0 contains the current method pointer,
  * and as part of the load sequence, it must be replaced with
  * the target method pointer.  Note, this may also be called
  * for "range" variants if the number of arguments is 5 or fewer.
  */
 int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
                          DecodedInstruction* dInsn, int callState,
                          LIR** pcrLabel, NextCallInsn nextCallInsn,
                          uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
 {
     RegLocation rlArg;

     /* If no arguments, just return */
     if (dInsn->vA == 0)
         return callState;

     callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);

     DCHECK_LE(dInsn->vA, 5U);
     if (dInsn->vA > 3) {
         uint32_t nextUse = 3;
         //Detect special case of wide arg spanning arg3/arg4
         RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
         RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
         RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
         if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
             rlUse2.wide) {
             int reg;
             // Wide spans, we need the 2nd half of uses[2].
             rlArg = oatUpdateLocWide(cUnit, rlUse2);
             if (rlArg.location == kLocPhysReg) {
                 reg = rlArg.highReg;
             } else {
                 // rARG2 & rARG3 can safely be used here
                 reg = rARG3;
                 loadWordDisp(cUnit, rSP,
                              oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
                 callState = nextCallInsn(cUnit, mir, callState, dexIdx,
                                          methodIdx);
             }
             storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
             storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
             callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
             nextUse++;
         }
         // Loop through the rest
         while (nextUse < dInsn->vA) {
             int lowReg;
             int highReg;
             rlArg = oatGetRawSrc(cUnit, mir, nextUse);
             rlArg = oatUpdateRawLoc(cUnit, rlArg);
             if (rlArg.location == kLocPhysReg) {
                 lowReg = rlArg.lowReg;
                 highReg = rlArg.highReg;
             } else {
                 lowReg = rARG2;
                 highReg = rARG3;
                 if (rlArg.wide) {
                     loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
                 } else {
                     loadValueDirectFixed(cUnit, rlArg, lowReg);
                 }
                 callState = nextCallInsn(cUnit, mir, callState, dexIdx,
                                          methodIdx);
             }
             int outsOffset = (nextUse + 1) * 4;
             if (rlArg.wide) {
                 storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
                 nextUse += 2;
             } else {
                 storeWordDisp(cUnit, rSP, outsOffset, lowReg);
                 nextUse++;
             }
             callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
         }
     }

     callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
                             dexIdx, methodIdx, skipThis);

     if (pcrLabel) {
         *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
     }
     return callState;
 }

 /*
  * May have 0+ arguments (also used for jumbo).  Note that
  * source virtual registers may be in physical registers, so may
  * need to be flushed to home location before copying.  This
  * applies to arg3 and above (see below).
  *
  * Two general strategies:
  *    If < 20 arguments
  *       Pass args 3-18 using vldm/vstm block copy
  *       Pass arg0, arg1 & arg2 in rARG1-rARG3
  *    If 20+ arguments
  *       Pass args arg19+ using memcpy block copy
  *       Pass arg0, arg1 & arg2 in rARG1-rARG3
  *
  */
 int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
                        DecodedInstruction* dInsn, int callState,
                        LIR** pcrLabel, NextCallInsn nextCallInsn,
                        uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
 {
     int firstArg = dInsn->vC;
     int numArgs = dInsn->vA;

     // If we can treat it as non-range (Jumbo ops will use range form)
     if (numArgs <= 5)
         return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
                                     nextCallInsn, dexIdx, methodIdx,
                                     skipThis);
     /*
      * Make sure range list doesn't span the break between in normal
      * Dalvik vRegs and the ins.
      */
     int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
     int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
     if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
         LOG(FATAL) << "Argument list spanned locals & args";
     }

     /*
      * First load the non-register arguments.  Both forms expect all
      * of the source arguments to be in their home frame location, so
      * scan the sReg names and flush any that have been promoted to
      * frame backing storage.
      */
     // Scan the rest of the args - if in physReg flush to memory
     for (int nextArg = 0; nextArg < numArgs;) {
         RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
         if (loc.wide) {
             loc = oatUpdateLocWide(cUnit, loc);
             if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
                 storeBaseDispWide(cUnit, rSP,
                                   oatSRegOffset(cUnit, loc.sRegLow),
                                   loc.lowReg, loc.highReg);
             }
             nextArg += 2;
         } else {
             loc = oatUpdateLoc(cUnit, loc);
             if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
                 storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
                               loc.lowReg, kWord);
             }
             nextArg++;
         }
     }

     int startOffset = oatSRegOffset(cUnit,
         cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
     int outsOffset = 4 /* Method* */ + (3 * 4);
 #if defined(TARGET_MIPS)
     // Generate memcpy
     opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
     opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
     int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
     loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
     callRuntimeHelper(cUnit, rTgt);
     // Restore Method*
     loadCurrMethodDirect(cUnit, rARG0);
 #else
     if (numArgs >= 20) {
         // Generate memcpy
         opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
         opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
         int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
         loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
         callRuntimeHelper(cUnit, rTgt);
         // Restore Method*
         loadCurrMethodDirect(cUnit, rARG0);
     } else {
         // Use vldm/vstm pair using rARG3 as a temp
         int regsLeft = std::min(numArgs - 3, 16);
         callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
         opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
         LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
         //TUNING: loosen barrier
         ld->defMask = ENCODE_ALL;
         setMemRefType(ld, true /* isLoad */, kDalvikReg);
         callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
         opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* */ + (3 * 4));
         callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
         LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
         setMemRefType(st, false /* isLoad */, kDalvikReg);
         st->defMask = ENCODE_ALL;
         callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
     }
 #endif

     callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
                             dexIdx, methodIdx, skipThis);

     callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
     if (pcrLabel) {
         *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
     }
     return callState;
 }

 #endif  // TARGET_ARM || TARGET_MIPS


 }  // namespace art
	/*
	* Copyright (C) 2012 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	namespace art {

	/*
	* This source files contains "gen" codegen routines that should
	* be applicable to most targets. Only mid-level support utilities
	* and "op" calls may be used here.
	*/


	/*
	* x86 targets will likely be different enough to need their own
	* invoke gen routies.
	*/
	#if defined(TARGET_ARM) \|\| defined (TARGET_MIPS)
	typedef int (NextCallInsn)(CompilationUnit, MIR*, int, uint32_t dexIdx,
	uint32_t methodIdx);
	/*
	* If there are any ins passed in registers that have not been promoted
	* to a callee-save register, flush them to the frame. Perform intial
	* assignment of promoted arguments.
	*/
	void flushIns(CompilationUnit* cUnit)
	{
	if (cUnit->numIns == 0)
	return;
	int firstArgReg = rARG1;
	int lastArgReg = rARG3;
	int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
	/*
	* Arguments passed in registers should be flushed
	* to their backing locations in the frame for now.
	* Also, we need to do initial assignment for promoted
	* arguments. NOTE: an older version of dx had an issue
	* in which it would reuse static method argument registers.
	* This could result in the same Dalvik virtual register
	* being promoted to both core and fp regs. In those
	* cases, copy argument to both. This will be uncommon
	* enough that it isn't worth attempting to optimize.
	*/
	for (int i = 0; i < cUnit->numIns; i++) {
	PromotionMap vMap = cUnit->promotionMap[startVReg + i];
	if (i <= (lastArgReg - firstArgReg)) {
	// If arriving in register
	if (vMap.coreLocation == kLocPhysReg) {
	genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
	}
	if (vMap.fpLocation == kLocPhysReg) {
	genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
	}
	// Also put a copy in memory in case we're partially promoted
	storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
	firstArgReg + i, kWord);
	} else {
	// If arriving in frame & promoted
	if (vMap.coreLocation == kLocPhysReg) {
	loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
	vMap.coreReg);
	}
	if (vMap.fpLocation == kLocPhysReg) {
	loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
	vMap.fpReg);
	}
	}
	}
	}

	/*
	* Bit of a hack here - in leiu of a real scheduling pass,
	* emit the next instruction in static & direct invoke sequences.
	*/
	int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
	int state, uint32_t dexIdx, uint32_t unused)
	{
	switch(state) {
	case 0: // Get the current Method* [sets rARG0]
	loadCurrMethodDirect(cUnit, rARG0);
	break;
	case 1: // Get method->dex_cache_resolved_methods_
	loadWordDisp(cUnit, rARG0,
	Method::DexCacheResolvedMethodsOffset().Int32Value(),
	rARG0);
	break;
	case 2: // Grab target method*
	loadWordDisp(cUnit, rARG0,
	Array::DataOffset(sizeof(Object)).Int32Value() + dexIdx 4,
	rARG0);
	break;
	case 3: // Grab the code from the method*
	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
	rLINK);
	break;
	default:
	return -1;
	}
	return state + 1;
	}

	/*
	* Bit of a hack here - in leiu of a real scheduling pass,
	* emit the next instruction in a virtual invoke sequence.
	* We can use rLR as a temp prior to target address loading
	* Note also that we'll load the first argument ("this") into
	* rARG1 here rather than the standard loadArgRegs.
	*/
	int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
	int state, uint32_t dexIdx, uint32_t methodIdx)
	{
	RegLocation rlArg;
	/*
	* This is the fast path in which the target virtual method is
	* fully resolved at compile time.
	*/
	switch(state) {
	case 0: // Get "this" [set rARG1]
	rlArg = oatGetSrc(cUnit, mir, 0);
	loadValueDirectFixed(cUnit, rlArg, rARG1);
	break;
	case 1: // Is "this" null? [use rARG1]
	genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
	// get this->klass_ [use rARG1, set rLINK]
	loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
	rLINK);
	break;
	case 2: // Get this->klass_->vtable [usr rLINK, set rLINK]
	loadWordDisp(cUnit, rLINK, Class::VTableOffset().Int32Value(),
	rLINK);
	break;
	case 3: // Get target method [use rLINK, set rARG0]
	loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
	Array::DataOffset(sizeof(Object*)).Int32Value(),
	rARG0);
	break;
	case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
	rLINK);
	break;
	default:
	return -1;
	}
	return state + 1;
	}

	/*
	* Interleave launch code for INVOKE_SUPER. See comments
	* for nextVCallIns.
	*/
	int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
	int state, uint32_t dexIdx, uint32_t methodIdx)
	{
	/*
	* This is the fast path in which the target virtual method is
	* fully resolved at compile time. Note also that this path assumes
	* that the check to verify that the target method index falls
	* within the size of the super's vtable has been done at compile-time.
	*/
	RegLocation rlArg;
	switch(state) {
	case 0: // Get current Method* [set rARG0]
	loadCurrMethodDirect(cUnit, rARG0);
	// Load "this" [set rARG1]
	rlArg = oatGetSrc(cUnit, mir, 0);
	loadValueDirectFixed(cUnit, rlArg, rARG1);
	// Get method->declaring_class_ [use rARG0, set rLINK]
	loadWordDisp(cUnit, rARG0,
	Method::DeclaringClassOffset().Int32Value(),
	rLINK);
	// Is "this" null? [use rARG1]
	genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
	break;
	case 1: // Get method->declaring_class_->super_class [use/set rLINK]
	loadWordDisp(cUnit, rLINK,
	Class::SuperClassOffset().Int32Value(), rLINK);
	break;
	case 2: // Get ...->super_class_->vtable [u/s rLINK]
	loadWordDisp(cUnit, rLINK,
	Class::VTableOffset().Int32Value(), rLINK);
	break;
	case 3: // Get target method [use rLINK, set rARG0]
	loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
	Array::DataOffset(sizeof(Object*)).Int32Value(),
	rARG0);
	break;
	case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
	loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
	rLINK);
	break;
	default:
	return -1;
	}
	return state + 1;
	}

	int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
	int state, uint32_t dexIdx, uint32_t methodIdx)
	{
	/*
	* This handles the case in which the base method is not fully
	* resolved at compile time, we bail to a runtime helper.
	*/
	if (state == 0) {
	// Load trampoline target
	loadWordDisp(cUnit, rSELF, trampoline, rLINK);
	// Load rARG0 with method index
	loadConstant(cUnit, rARG0, dexIdx);
	return 1;
	}
	return -1;
	}

	int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
	int state, uint32_t dexIdx, uint32_t methodIdx)
	{
	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
	}

	int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
	uint32_t dexIdx, uint32_t methodIdx)
	{
	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
	}

	int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
	uint32_t dexIdx, uint32_t methodIdx)
	{
	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
	}

	int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
	uint32_t dexIdx, uint32_t methodIdx)
	{
	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
	}

	/*
	* All invoke-interface calls bounce off of art_invoke_interface_trampoline,
	* which will locate the target and continue on via a tail call.
	*/
	int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
	uint32_t dexIdx, uint32_t unused)
	{
	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
	}

	int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
	int state, uint32_t dexIdx,
	uint32_t unused)
	{
	int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
	return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
	}

	int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
	int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
	uint32_t methodIdx, bool skipThis)
	{
	int nextReg = rARG1;
	int nextArg = 0;
	if (skipThis) {
	nextReg++;
	nextArg++;
	}
	for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
	RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
	rlArg = oatUpdateRawLoc(cUnit, rlArg);
	if (rlArg.wide && (nextReg <= rARG2)) {
	loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
	nextReg++;
	nextArg++;
	} else {
	rlArg.wide = false;
	loadValueDirectFixed(cUnit, rlArg, nextReg);
	}
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	}
	return callState;
	}

	/*
	* Load up to 5 arguments, the first three of which will be in
	* rARG1 .. rARG3. On entry rARG0 contains the current method pointer,
	* and as part of the load sequence, it must be replaced with
	* the target method pointer. Note, this may also be called
	* for "range" variants if the number of arguments is 5 or fewer.
	*/
	int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
	DecodedInstruction* dInsn, int callState,
	LIR** pcrLabel, NextCallInsn nextCallInsn,
	uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
	{
	RegLocation rlArg;

	/* If no arguments, just return */
	if (dInsn->vA == 0)
	return callState;

	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);

	DCHECK_LE(dInsn->vA, 5U);
	if (dInsn->vA > 3) {
	uint32_t nextUse = 3;
	//Detect special case of wide arg spanning arg3/arg4
	RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
	RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
	RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
	if (((!rlUse0.wide && !rlUse1.wide) \|\| rlUse0.wide) &&
	rlUse2.wide) {
	int reg;
	// Wide spans, we need the 2nd half of uses[2].
	rlArg = oatUpdateLocWide(cUnit, rlUse2);
	if (rlArg.location == kLocPhysReg) {
	reg = rlArg.highReg;
	} else {
	// rARG2 & rARG3 can safely be used here
	reg = rARG3;
	loadWordDisp(cUnit, rSP,
	oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
	callState = nextCallInsn(cUnit, mir, callState, dexIdx,
	methodIdx);
	}
	storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
	storeBaseDisp(cUnit, rSP, 16 /* (3+1)4 /, reg, kWord);
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	nextUse++;
	}
	// Loop through the rest
	while (nextUse < dInsn->vA) {
	int lowReg;
	int highReg;
	rlArg = oatGetRawSrc(cUnit, mir, nextUse);
	rlArg = oatUpdateRawLoc(cUnit, rlArg);
	if (rlArg.location == kLocPhysReg) {
	lowReg = rlArg.lowReg;
	highReg = rlArg.highReg;
	} else {
	lowReg = rARG2;
	highReg = rARG3;
	if (rlArg.wide) {
	loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
	} else {
	loadValueDirectFixed(cUnit, rlArg, lowReg);
	}
	callState = nextCallInsn(cUnit, mir, callState, dexIdx,
	methodIdx);
	}
	int outsOffset = (nextUse + 1) * 4;
	if (rlArg.wide) {
	storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
	nextUse += 2;
	} else {
	storeWordDisp(cUnit, rSP, outsOffset, lowReg);
	nextUse++;
	}
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	}
	}

	callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
	dexIdx, methodIdx, skipThis);

	if (pcrLabel) {
	*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
	}
	return callState;
	}

	/*
	* May have 0+ arguments (also used for jumbo). Note that
	* source virtual registers may be in physical registers, so may
	* need to be flushed to home location before copying. This
	* applies to arg3 and above (see below).
	*
	* Two general strategies:
	* If < 20 arguments
	* Pass args 3-18 using vldm/vstm block copy
	* Pass arg0, arg1 & arg2 in rARG1-rARG3
	* If 20+ arguments
	* Pass args arg19+ using memcpy block copy
	* Pass arg0, arg1 & arg2 in rARG1-rARG3
	*
	*/
	int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
	DecodedInstruction* dInsn, int callState,
	LIR** pcrLabel, NextCallInsn nextCallInsn,
	uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
	{
	int firstArg = dInsn->vC;
	int numArgs = dInsn->vA;

	// If we can treat it as non-range (Jumbo ops will use range form)
	if (numArgs <= 5)
	return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
	nextCallInsn, dexIdx, methodIdx,
	skipThis);
	/*
	* Make sure range list doesn't span the break between in normal
	* Dalvik vRegs and the ins.
	*/
	int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
	int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
	if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
	LOG(FATAL) << "Argument list spanned locals & args";
	}

	/*
	* First load the non-register arguments. Both forms expect all
	* of the source arguments to be in their home frame location, so
	* scan the sReg names and flush any that have been promoted to
	* frame backing storage.
	*/
	// Scan the rest of the args - if in physReg flush to memory
	for (int nextArg = 0; nextArg < numArgs;) {
	RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
	if (loc.wide) {
	loc = oatUpdateLocWide(cUnit, loc);
	if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
	storeBaseDispWide(cUnit, rSP,
	oatSRegOffset(cUnit, loc.sRegLow),
	loc.lowReg, loc.highReg);
	}
	nextArg += 2;
	} else {
	loc = oatUpdateLoc(cUnit, loc);
	if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
	storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
	loc.lowReg, kWord);
	}
	nextArg++;
	}
	}

	int startOffset = oatSRegOffset(cUnit,
	cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
	int outsOffset = 4 /* Method* / + (3 4);
	#if defined(TARGET_MIPS)
	// Generate memcpy
	opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
	opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
	int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
	loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
	callRuntimeHelper(cUnit, rTgt);
	// Restore Method*
	loadCurrMethodDirect(cUnit, rARG0);
	#else
	if (numArgs >= 20) {
	// Generate memcpy
	opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
	opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
	int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
	loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
	callRuntimeHelper(cUnit, rTgt);
	// Restore Method*
	loadCurrMethodDirect(cUnit, rARG0);
	} else {
	// Use vldm/vstm pair using rARG3 as a temp
	int regsLeft = std::min(numArgs - 3, 16);
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
	LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
	//TUNING: loosen barrier
	ld->defMask = ENCODE_ALL;
	setMemRefType(ld, true /* isLoad */, kDalvikReg);
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* / + (3 4));
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
	setMemRefType(st, false /* isLoad */, kDalvikReg);
	st->defMask = ENCODE_ALL;
	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	}
	#endif

	callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
	dexIdx, methodIdx, skipThis);

	callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
	if (pcrLabel) {
	*pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
	}
	return callState;
	}

	#endif // TARGET_ARM \|\| TARGET_MIPS


	} // namespace art