Make exception handling code in switch interpreter NO_INLINE.

Exceptions should be rare so the code can be shared.
The compiler needs a bit of help with optimization.

It saves 81k(arm32) and 150k(arm64) from libart.so.

Performance difference is within noise (golem using the switch).

Test: "./art/test.py -b -r --interpreter --host" with switch interpreter.
Change-Id: I08a5690ceaf6b3cae8c2a486ab809e5604b88e0a
diff --git a/runtime/interpreter/interpreter_switch_impl-inl.h b/runtime/interpreter/interpreter_switch_impl-inl.h
index bf12a72..94cb3de 100644
--- a/runtime/interpreter/interpreter_switch_impl-inl.h
+++ b/runtime/interpreter/interpreter_switch_impl-inl.h
@@ -71,7 +71,7 @@
     return true;
   }
 
-  ALWAYS_INLINE WARN_UNUSED bool HandlePendingExceptionWithInstrumentation(
+  NO_INLINE WARN_UNUSED bool HandlePendingExceptionWithInstrumentationImpl(
       const instrumentation::Instrumentation* instr)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(self->IsExceptionPending());
@@ -99,6 +99,25 @@
     return false;  // Stop executing this opcode and continue in the exception handler.
   }
 
+  // Forwards the call to the NO_INLINE HandlePendingExceptionWithInstrumentationImpl.
+  ALWAYS_INLINE WARN_UNUSED bool HandlePendingExceptionWithInstrumentation(
+      const instrumentation::Instrumentation* instr)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    // We need to help the compiler a bit to make the NO_INLINE call efficient.
+    //  * All handler fields should be in registers, so we do not want to take the object
+    //    address (for 'this' argument). Make a copy of the handler just for the slow path.
+    //  * The modifiable fields should also be in registers, so we don't want to store their
+    //    address even in the handler copy. Make a copy of them just for the call as well.
+    const Instruction* inst_copy = inst;
+    bool exit_loop_copy = exit_interpreter_loop;
+    InstructionHandler<do_access_check, transaction_active> handler_copy(
+        ctx, instrumentation, self, shadow_frame, dex_pc, inst_copy, inst_data, exit_loop_copy);
+    bool result = handler_copy.HandlePendingExceptionWithInstrumentationImpl(instr);
+    inst = inst_copy;
+    exit_interpreter_loop = exit_loop_copy;
+    return result;
+  }
+
   ALWAYS_INLINE WARN_UNUSED bool HandlePendingException()
       REQUIRES_SHARED(Locks::mutator_lock_) {
     return HandlePendingExceptionWithInstrumentation(instrumentation);