Fast path interface dispatch.

Interface dispatch when the method we're dispatching against is known
currently goes slow path. This change makes the load of the interface
method either a load of a constant or from the resolve methods table. It
also makes the null check on the "this" pointer inline.

Change-Id: I69571a062d3d693bee2dec6e46a456e0f74411cd
diff --git a/src/common_throws.cc b/src/common_throws.cc
index 9fb686a..84ce565 100644
--- a/src/common_throws.cc
+++ b/src/common_throws.cc
@@ -82,6 +82,10 @@
     case Instruction::INVOKE_VIRTUAL_RANGE:
       ThrowNullPointerExceptionForMethodAccess(throw_method, dec_insn.vB, kVirtual);
       break;
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      ThrowNullPointerExceptionForMethodAccess(throw_method, dec_insn.vB, kInterface);
+      break;
     case Instruction::IGET:
     case Instruction::IGET_WIDE:
     case Instruction::IGET_OBJECT:
diff --git a/src/compiler.cc b/src/compiler.cc
index c69c4ff..97a35f0 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -820,9 +820,13 @@
 
 void Compiler::GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type, AbstractMethod* method,
                                              uintptr_t& direct_code, uintptr_t& direct_method) {
+  // For direct and static methods compute possible direct_code and direct_method values, ie
+  // an address for the Method* being invoked and an address of the code for that Method*.
+  // For interface calls compute a value for direct_method that is the interface method being
+  // invoked, so this can be passed to the out-of-line runtime support code.
   direct_code = 0;
   direct_method = 0;
-  if (sharp_type != kStatic && sharp_type != kDirect) {
+  if (sharp_type != kStatic && sharp_type != kDirect && sharp_type != kInterface) {
     return;
   }
   bool method_code_in_boot = method->GetDeclaringClass()->GetClassLoader() == NULL;
@@ -885,8 +889,7 @@
                                               referrer_class);
       }
       if (referrer_class->CanAccess(methods_class) &&
-          referrer_class->CanAccessMember(methods_class,
-                                          resolved_method->GetAccessFlags())) {
+          referrer_class->CanAccessMember(methods_class, resolved_method->GetAccessFlags())) {
         vtable_idx = resolved_method->GetMethodIndex();
         const bool kEnableSharpening = true;
         // Sharpen a virtual call into a direct call when the target is known.
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index ccc2a83..cf06c80 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -650,38 +650,35 @@
   }
   // Push code and method literals, record offsets for the compiler to patch.
   dataLIR = cUnit->codeLiteralList;
-  if (dataLIR != NULL) {
-    while (dataLIR != NULL) {
-      uint32_t target = dataLIR->operands[0];
-      cUnit->compiler->AddCodePatch(cUnit->dex_file,
+  while (dataLIR != NULL) {
+    uint32_t target = dataLIR->operands[0];
+    cUnit->compiler->AddCodePatch(cUnit->dex_file,
+                                  cUnit->method_idx,
+                                  cUnit->invoke_type,
+                                  target,
+                                  static_cast<InvokeType>(dataLIR->operands[1]),
+                                  cUnit->codeBuffer.size());
+    const DexFile::MethodId& id = cUnit->dex_file->GetMethodId(target);
+    // unique based on target to ensure code deduplication works
+    uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id);
+    pushWord(cUnit->codeBuffer, unique_patch_value);
+    dataLIR = NEXT_LIR(dataLIR);
+  }
+  dataLIR = cUnit->methodLiteralList;
+  while (dataLIR != NULL) {
+    uint32_t target = dataLIR->operands[0];
+    cUnit->compiler->AddMethodPatch(cUnit->dex_file,
                                     cUnit->method_idx,
                                     cUnit->invoke_type,
                                     target,
                                     static_cast<InvokeType>(dataLIR->operands[1]),
                                     cUnit->codeBuffer.size());
-      const DexFile::MethodId& id = cUnit->dex_file->GetMethodId(target);
-      // unique based on target to ensure code deduplication works
-      uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id);
-      pushWord(cUnit->codeBuffer, unique_patch_value);
-      dataLIR = NEXT_LIR(dataLIR);
-    }
-    dataLIR = cUnit->methodLiteralList;
-    while (dataLIR != NULL) {
-      uint32_t target = dataLIR->operands[0];
-      cUnit->compiler->AddMethodPatch(cUnit->dex_file,
-                                      cUnit->method_idx,
-                                      cUnit->invoke_type,
-                                      target,
-                                      static_cast<InvokeType>(dataLIR->operands[1]),
-                                      cUnit->codeBuffer.size());
-      const DexFile::MethodId& id = cUnit->dex_file->GetMethodId(target);
-      // unique based on target to ensure code deduplication works
-      uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id);
-      pushWord(cUnit->codeBuffer, unique_patch_value);
-      dataLIR = NEXT_LIR(dataLIR);
-    }
+    const DexFile::MethodId& id = cUnit->dex_file->GetMethodId(target);
+    // unique based on target to ensure code deduplication works
+    uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id);
+    pushWord(cUnit->codeBuffer, unique_patch_value);
+    dataLIR = NEXT_LIR(dataLIR);
   }
-
 }
 
 /* Write the switch tables to the output stream */
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index 3cc7c93..0208a4a 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -188,7 +188,7 @@
   } else {
     switch (state) {
     case 0:  // Get the current Method* [sets rARG0]
-      // TUNING: we can save a reg copy if Method* has been promoted
+      // TUNING: we can save a reg copy if Method* has been promoted.
       loadCurrMethodDirect(cUnit, rARG0);
       break;
     case 1:  // Get method->dex_cache_resolved_methods_
@@ -247,16 +247,16 @@
                   int state, uint32_t dexIdx, uint32_t methodIdx,
                   uintptr_t unused, uintptr_t unused2, InvokeType unused3)
 {
-  RegLocation rlArg;
   /*
    * This is the fast path in which the target virtual method is
    * fully resolved at compile time.
    */
   switch (state) {
-    case 0:  // Get "this" [set rARG1]
-      rlArg = info->args[0];
+    case 0: {  // Get "this" [set rARG1]
+      RegLocation  rlArg = info->args[0];
       loadValueDirectFixed(cUnit, rlArg, rARG1);
       break;
+    }
     case 1: // Is "this" null? [use rARG1]
       genNullCheck(cUnit, info->args[0].sRegLow, rARG1, info->optFlags);
       // get this->klass_ [use rARG1, set rINVOKE_TGT]
@@ -283,6 +283,76 @@
   return state + 1;
 }
 
+/*
+ * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
+ * which will locate the target and continue on via a tail call.
+ */
+int nextInterfaceCallInsn(CompilationUnit* cUnit, CallInfo* info, int state,
+                          uint32_t dexIdx, uint32_t unused, uintptr_t unused2,
+                          uintptr_t directMethod, InvokeType unused4)
+{
+#if !defined(TARGET_ARM)
+  directMethod = 0;
+#endif
+#if !defined(TARGET_X86)
+  int trampoline = ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline);
+#endif
+
+  if (directMethod != 0) {
+    switch (state) {
+      case 0:  // Load the trampoline target [sets rINVOKE_TGT].
+#if !defined(TARGET_X86)
+        loadWordDisp(cUnit, rSELF, trampoline, rINVOKE_TGT);
+#endif
+        // Get the interface Method* [sets rARG0]
+        if (directMethod != (uintptr_t)-1) {
+          loadConstant(cUnit, rARG0, directMethod);
+        } else {
+          LIR* dataTarget = scanLiteralPool(cUnit->methodLiteralList, dexIdx, 0);
+          if (dataTarget == NULL) {
+            dataTarget = addWordData(cUnit, &cUnit->methodLiteralList, dexIdx);
+            dataTarget->operands[1] = kInterface;
+          }
+#if defined(TARGET_ARM)
+          LIR* loadPcRel = rawLIR(cUnit, cUnit->currentDalvikOffset,
+                                  kThumb2LdrPcRel12, rARG0, 0, 0, 0, 0,
+                                  dataTarget);
+          oatAppendLIR(cUnit, loadPcRel);
+#else
+          UNIMPLEMENTED(FATAL) << (void*)dataTarget;
+#endif
+        }
+        break;
+      default:
+        return -1;
+    }
+  } else {
+    switch (state) {
+      case 0:
+        // Get the current Method* [sets rARG0] - TUNING: remove copy of method if it is promoted.
+        loadCurrMethodDirect(cUnit, rARG0);
+        // Load the trampoline target [sets rINVOKE_TGT].
+#if !defined(TARGET_X86)
+        loadWordDisp(cUnit, rSELF, trampoline, rINVOKE_TGT);
+#endif
+        break;
+    case 1:  // Get method->dex_cache_resolved_methods_ [set/use rARG0]
+      loadWordDisp(cUnit, rARG0,
+                   AbstractMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                   rARG0);
+      break;
+    case 2:  // Grab target method* [set/use rARG0]
+      loadWordDisp(cUnit, rARG0,
+                   Array::DataOffset(sizeof(Object*)).Int32Value() + dexIdx * 4,
+                   rARG0);
+      break;
+    default:
+      return -1;
+    }
+  }
+  return state + 1;
+}
+
 int nextInvokeInsnSP(CompilationUnit* cUnit, CallInfo* info, int trampoline,
                      int state, uint32_t dexIdx, uint32_t methodIdx)
 {
@@ -335,18 +405,6 @@
   return nextInvokeInsnSP(cUnit, info, trampoline, state, dexIdx, 0);
 }
 
-/*
- * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
- * which will locate the target and continue on via a tail call.
- */
-int nextInterfaceCallInsn(CompilationUnit* cUnit, CallInfo* info, int state,
-                          uint32_t dexIdx, uint32_t unused, uintptr_t unused2,
-                          uintptr_t unused3, InvokeType unused4)
-{
-  int trampoline = ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline);
-  return nextInvokeInsnSP(cUnit, info, trampoline, state, dexIdx, 0);
-}
-
 int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit,
                                          CallInfo* info, int state,
                                          uint32_t dexIdx, uint32_t unused,
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 3a80d10..3321c33 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -83,8 +83,11 @@
                                        directMethod)
     && !SLOW_INVOKE_PATH;
   if (info->type == kInterface) {
+    if (fastPath) {
+      pNullCk = &nullCk;
+    }
     nextCallInsn = fastPath ? nextInterfaceCallInsn
-        : nextInterfaceCallInsnWithAccessCheck;
+                            : nextInterfaceCallInsnWithAccessCheck;
     skipThis = false;
   } else if (info->type == kDirect) {
     if (fastPath) {
diff --git a/src/disassembler_arm.cc b/src/disassembler_arm.cc
index 509755c..dfaacf2 100644
--- a/src/disassembler_arm.cc
+++ b/src/disassembler_arm.cc
@@ -1006,7 +1006,7 @@
         ThumbRegister Rt(instr, 8);
         uint16_t imm8 = instr & 0xFF;
         opcode << (opB == 0 ? "str" : "ldr");
-        args << Rt << ", [ sp, #" << (imm8 << 2) << "]";
+        args << Rt << ", [sp, #" << (imm8 << 2) << "]";
       } else {
         uint16_t imm5 = (instr >> 6) & 0x1F;
         uint16_t opB = (instr >> 11) & 1;
diff --git a/src/oat/runtime/support_invoke.cc b/src/oat/runtime/support_invoke.cc
index 87497cc..4656198 100644
--- a/src/oat/runtime/support_invoke.cc
+++ b/src/oat/runtime/support_invoke.cc
@@ -19,6 +19,94 @@
 
 namespace art {
 
+// Determine target of interface dispatch. This object is known non-null.
+extern "C" uint64_t artInvokeInterfaceTrampoline(AbstractMethod* interface_method,
+                                                 Object* this_object, AbstractMethod* caller_method,
+                                                 Thread* self, AbstractMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  AbstractMethod* method;
+  if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex16)) {
+    method = this_object->GetClass()->FindVirtualMethodForInterface(interface_method);
+  } else {
+    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
+    DCHECK(interface_method == Runtime::Current()->GetResolutionMethod());
+    // Determine method index from calling dex instruction.
+#if defined(__arm__)
+    // On entry the stack pointed by sp is:
+    // | argN       |  |
+    // | ...        |  |
+    // | arg4       |  |
+    // | arg3 spill |  |  Caller's frame
+    // | arg2 spill |  |
+    // | arg1 spill |  |
+    // | Method*    | ---
+    // | LR         |
+    // | ...        |    callee saves
+    // | R3         |    arg3
+    // | R2         |    arg2
+    // | R1         |    arg1
+    // | R0         |
+    // | Method*    |  <- sp
+    DCHECK_EQ(48U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
+    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + kPointerSize);
+    uintptr_t caller_pc = regs[10];
+#elif defined(__i386__)
+    // On entry the stack pointed by sp is:
+    // | argN        |  |
+    // | ...         |  |
+    // | arg4        |  |
+    // | arg3 spill  |  |  Caller's frame
+    // | arg2 spill  |  |
+    // | arg1 spill  |  |
+    // | Method*     | ---
+    // | Return      |
+    // | EBP,ESI,EDI |    callee saves
+    // | EBX         |    arg3
+    // | EDX         |    arg2
+    // | ECX         |    arg1
+    // | EAX/Method* |  <- sp
+    DCHECK_EQ(32U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes());
+    uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp));
+    uintptr_t caller_pc = regs[7];
+#else
+    UNIMPLEMENTED(FATAL);
+    uintptr_t caller_pc = 0;
+#endif
+    uint32_t dex_pc = caller_method->ToDexPc(caller_pc);
+    const DexFile::CodeItem* code = MethodHelper(caller_method).GetCodeItem();
+    CHECK_LT(dex_pc, code->insns_size_in_code_units_);
+    const Instruction* instr = Instruction::At(&code->insns_[dex_pc]);
+    Instruction::Code instr_code = instr->Opcode();
+    CHECK(instr_code == Instruction::INVOKE_INTERFACE ||
+          instr_code == Instruction::INVOKE_INTERFACE_RANGE)
+        << "Unexpected call into interface trampoline: " << instr->DumpString(NULL);
+    DecodedInstruction dec_insn(instr);
+    uint32_t dex_method_idx = dec_insn.vB;
+    method = FindMethodFromCode(dex_method_idx, this_object, caller_method, self,
+                                                false, kInterface);
+    if (UNLIKELY(method == NULL)) {
+      CHECK(self->IsExceptionPending());
+      return 0;  // failure
+    }
+  }
+  const void* code = method->GetCode();
+
+#ifndef NDEBUG
+  // When we return, the caller will branch to this address, so it had better not be 0!
+  if (UNLIKELY(code == NULL)) {
+      MethodHelper mh(method);
+      LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
+                 << " location: " << mh.GetDexFile().GetLocation();
+  }
+#endif
+
+  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
+  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
+  uint64_t result = ((code_uint << 32) | method_uint);
+  return result;
+}
+
+
 static uint64_t artInvokeCommon(uint32_t method_idx, Object* this_object, AbstractMethod* caller_method,
                                 Thread* self, AbstractMethod** sp, bool access_check, InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -38,13 +126,14 @@
   DCHECK(!self->IsExceptionPending());
   const void* code = method->GetCode();
 
+#ifndef NDEBUG
   // When we return, the caller will branch to this address, so it had better not be 0!
   if (UNLIKELY(code == NULL)) {
       MethodHelper mh(method);
       LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
                  << " location: " << mh.GetDexFile().GetLocation();
   }
-
+#endif
 
   uint32_t method_uint = reinterpret_cast<uint32_t>(method);
   uint64_t code_uint = reinterpret_cast<uint32_t>(code);
@@ -53,13 +142,6 @@
 }
 
 // See comments in runtime_support_asm.S
-extern "C" uint64_t artInvokeInterfaceTrampoline(uint32_t method_idx, Object* this_object,
-                                                 AbstractMethod* caller_method, Thread* self,
-                                                 AbstractMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return artInvokeCommon(method_idx, this_object, caller_method, self, sp, false, kInterface);
-}
-
 extern "C" uint64_t artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
                                                                 Object* this_object,
                                                                 AbstractMethod* caller_method,