Add invoke fast-path support for quickened invokes.

Quickened invokes had their own handler implementation.
Merge it into the generic DoInvoke which has the fast-path.

This speeds up arm64 golem interpreter benchmarks by 6%.

Test: test.py -b -r --interpreter --host --64
Change-Id: Icac9e073f61df67780242877179111ed7bee7154
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9f4403e..e515d9d 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -156,7 +156,7 @@
 
 // Handles all invoke-XXX/range instructions except for invoke-polymorphic[/range].
 // Returns true on success, otherwise throws an exception and returns false.
-template<InvokeType type, bool is_range, bool do_access_check, bool is_mterp>
+template<InvokeType type, bool is_range, bool do_access_check, bool is_mterp, bool is_quick = false>
 static ALWAYS_INLINE bool DoInvoke(Thread* self,
                                    ShadowFrame& shadow_frame,
                                    const Instruction* inst,
@@ -177,7 +177,9 @@
   InterpreterCache* tls_cache = self->GetInterpreterCache();
   size_t tls_value;
   ArtMethod* resolved_method;
-  if (LIKELY(tls_cache->Get(inst, &tls_value))) {
+  if (is_quick) {
+    resolved_method = nullptr;  // We don't know/care what the original method was.
+  } else if (LIKELY(tls_cache->Get(inst, &tls_value))) {
     resolved_method = reinterpret_cast<ArtMethod*>(tls_value);
   } else {
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
@@ -196,8 +198,20 @@
   // Null pointer check and virtual method resolution.
   ObjPtr<mirror::Object> receiver =
       (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
-  ArtMethod* const called_method = FindMethodToCall<type, do_access_check>(
-      method_idx, resolved_method, &receiver, sf_method, self);
+  ArtMethod* called_method;
+  if (is_quick) {
+    if (UNLIKELY(receiver == nullptr)) {
+      // We lost the reference to the method index so we cannot get a more precise exception.
+      ThrowNullPointerExceptionFromDexPC();
+      return false;
+    }
+    DCHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
+    called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
+        /*vtable_idx=*/ method_idx, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  } else {
+    called_method = FindMethodToCall<type, do_access_check>(
+        method_idx, resolved_method, &receiver, sf_method, self);
+  }
   if (UNLIKELY(called_method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
@@ -353,45 +367,6 @@
   }
 }
 
-// Handles invoke-virtual-quick and invoke-virtual-quick-range instructions.
-// Returns true on success, otherwise throws an exception and returns false.
-template<bool is_range>
-static inline bool DoInvokeVirtualQuick(Thread* self, ShadowFrame& shadow_frame,
-                                        const Instruction* inst, uint16_t inst_data,
-                                        JValue* result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-  ObjPtr<mirror::Object> const receiver = shadow_frame.GetVRegReference(vregC);
-  if (UNLIKELY(receiver == nullptr)) {
-    // We lost the reference to the method index so we cannot get a more
-    // precised exception message.
-    ThrowNullPointerExceptionFromDexPC();
-    return false;
-  }
-  const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
-  ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
-      vtable_idx, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
-  if (UNLIKELY(called_method == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    result->SetJ(0);
-    return false;
-  } else if (UNLIKELY(!called_method->IsInvokable())) {
-    called_method->ThrowInvocationTimeError();
-    result->SetJ(0);
-    return false;
-  } else {
-    jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr) {
-      jit->InvokeVirtualOrInterface(
-          receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
-      jit->AddSamples(self, shadow_frame.GetMethod(), 1, /*with_backedges=*/false);
-    }
-    // No need to check since we've been quickened.
-    return DoCall<is_range, false>(called_method, self, shadow_frame, inst, inst_data, result);
-  }
-}
-
 // Handles iget-XXX and sget-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
diff --git a/runtime/interpreter/interpreter_switch_impl-inl.h b/runtime/interpreter/interpreter_switch_impl-inl.h
index c430de2..3c31c38 100644
--- a/runtime/interpreter/interpreter_switch_impl-inl.h
+++ b/runtime/interpreter/interpreter_switch_impl-inl.h
@@ -1749,15 +1749,15 @@
       }
       case Instruction::INVOKE_VIRTUAL_QUICK: {
         PREAMBLE();
-        bool success = DoInvokeVirtualQuick<false>(
-            self, shadow_frame, inst, inst_data, &result_register);
+        bool success = DoInvoke<kVirtual, false, do_access_check, /*is_mterp=*/ false,
+            /*is_quick=*/ true>(self, shadow_frame, inst, inst_data, &result_register);
         POSSIBLY_HANDLE_PENDING_EXCEPTION_ON_INVOKE(!success);
         break;
       }
       case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
         PREAMBLE();
-        bool success = DoInvokeVirtualQuick<true>(
-            self, shadow_frame, inst, inst_data, &result_register);
+        bool success = DoInvoke<kVirtual, true, do_access_check, /*is_mterp=*/ false,
+            /*is_quick=*/ true>(self, shadow_frame, inst, inst_data, &result_register);
         POSSIBLY_HANDLE_PENDING_EXCEPTION_ON_INVOKE(!success);
         break;
       }
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index ba109bc..c58b688 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -321,25 +321,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  const uint32_t vregC = inst->VRegC_35c();
-  const uint32_t vtable_idx = inst->VRegB_35c();
-  ObjPtr<mirror::Object> const receiver = shadow_frame->GetVRegReference(vregC);
-  if (receiver != nullptr) {
-    ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
-        vtable_idx, kRuntimePointerSize);
-    if ((called_method != nullptr) && called_method->IsIntrinsic()) {
-      if (MterpHandleIntrinsic(shadow_frame, called_method, inst, inst_data, result_register)) {
-        jit::Jit* jit = Runtime::Current()->GetJit();
-        if (jit != nullptr) {
-          jit->InvokeVirtualOrInterface(
-              receiver, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), called_method);
-        }
-        return !self->IsExceptionPending();
-      }
-    }
-  }
-  return DoInvokeVirtualQuick<false>(
-      self, *shadow_frame, inst, inst_data, result_register);
+  return DoInvoke<kVirtual, /*is_range=*/ false, /*do_access_check=*/ false, /*is_mterp=*/ true,
+      /*is_quick=*/ true>(self, *shadow_frame, inst, inst_data, result_register);
 }
 
 extern "C" size_t MterpInvokeVirtualQuickRange(Thread* self,
@@ -349,8 +332,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvokeVirtualQuick<true>(
-      self, *shadow_frame, inst, inst_data, result_register);
+  return DoInvoke<kVirtual, /*is_range=*/ true, /*do_access_check=*/ false, /*is_mterp=*/ true,
+      /*is_quick=*/ true>(self, *shadow_frame, inst, inst_data, result_register);
 }
 
 extern "C" void MterpThreadFenceForConstructor() {