Improve invokeinterface for nterp.

- Remove bitwise negation of imt_index: the class linker already always
  initializes the imt_index.
- Special case the imt index of default methods: to simplify invocation
  in nterp, mask the method_index to create the imt index.
- Add arm64, arm, x64 support in nterp.

Test: test.py
Bug: 112676029
Change-Id: I815a4a4ec5c219921ab4ed1a20b02586aab19a46
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 2914382..b2a27f3 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -423,10 +423,8 @@
 }
 
 inline uint32_t ArtMethod::GetImtIndex() {
-  if (LIKELY(IsAbstract() && imt_index_ != 0)) {
-    uint16_t imt_index = ~imt_index_;
-    DCHECK_EQ(imt_index, ImTable::GetImtIndex(this)) << PrettyMethod();
-    return imt_index;
+  if (LIKELY(IsAbstract())) {
+    return imt_index_;
   } else {
     return ImTable::GetImtIndex(this);
   }
@@ -434,7 +432,7 @@
 
 inline void ArtMethod::CalculateAndSetImtIndex() {
   DCHECK(IsAbstract()) << PrettyMethod();
-  imt_index_ = ~ImTable::GetImtIndex(this);
+  imt_index_ = ImTable::GetImtIndex(this);
 }
 
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index b21a18d..31b81d4 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -792,8 +792,7 @@
     // Non-abstract methods: The hotness we measure for this method. Not atomic,
     // as we allow missing increments: if the method is hot, we will see it eventually.
     uint16_t hotness_count_;
-    // Abstract methods: IMT index (bitwise negated) or zero if it was not cached.
-    // The negation is needed to distinguish zero index and missing cached entry.
+    // Abstract methods: IMT index.
     uint16_t imt_index_;
   };
 
diff --git a/runtime/image.cc b/runtime/image.cc
index 6f88481..9417448 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,7 +29,8 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '8', '8', '\0' };  // Remove DexCache arrays.
+// Last change: IMT index for default methods.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '8', '9', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_reservation_size,
                          uint32_t component_count,
diff --git a/runtime/imtable-inl.h b/runtime/imtable-inl.h
index 21e3eb1..71ece99 100644
--- a/runtime/imtable-inl.h
+++ b/runtime/imtable-inl.h
@@ -83,6 +83,12 @@
 }
 
 inline uint32_t ImTable::GetImtIndex(ArtMethod* method) {
+  if (!method->IsAbstract()) {
+    // For default methods, where we cannot store the imt_index, we use the
+    // method_index instead. We mask it with the closest power of two to
+    // simplify the interpreter.
+    return method->GetMethodIndex() & (ImTable::kSizeTruncToPowerOfTwo - 1);
+  }
   uint32_t class_hash, name_hash, signature_hash;
   GetImtHashComponents(method, &class_hash, &name_hash, &signature_hash);
 
diff --git a/runtime/imtable.h b/runtime/imtable.h
index 5db3d07..df10cda 100644
--- a/runtime/imtable.h
+++ b/runtime/imtable.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_IMTABLE_H_
 #define ART_RUNTIME_IMTABLE_H_
 
+#include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/enums.h"
 #include "base/locks.h"
@@ -34,6 +35,10 @@
   // (non-marker) interfaces.
   // When this value changes, old images become incompatible, so image file version must change too.
   static constexpr size_t kSize = 43;
+  // Default methods cannot store the imt_index, so instead we make its IMT index depend on the
+  // method_index and mask it with the closest power of 2 of kSize - 1. This
+  // is to simplify fetching it in the interpreter.
+  static constexpr size_t kSizeTruncToPowerOfTwo = TruncToPowerOfTwo(kSize);
 
   uint8_t* AddressOfElement(size_t index, PointerSize pointer_size) {
     return reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
diff --git a/runtime/interpreter/mterp/arm64ng/invoke.S b/runtime/interpreter/mterp/arm64ng/invoke.S
index 4a7ec49..ac45a35 100644
--- a/runtime/interpreter/mterp/arm64ng/invoke.S
+++ b/runtime/interpreter/mterp/arm64ng/invoke.S
@@ -72,7 +72,7 @@
 %def invoke_interface(range=""):
    EXPORT_PC
    // Fast-path which gets the method from thread-local cache.
-   FETCH_FROM_THREAD_CACHE x0, 2f
+   FETCH_FROM_THREAD_CACHE x26, 5f
 1:
    // First argument is the 'this' pointer.
    FETCH w1, 2
@@ -82,26 +82,42 @@
    GET_VREG w1, w1
    // Note: if w1 is null, this will be handled by our SIGSEGV handler.
    ldr w2, [x1, #MIRROR_OBJECT_CLASS_OFFSET]
+   // Test the first two bits of the fetched ArtMethod:
+   // - If the first bit is set, this is a method on j.l.Object
+   // - If the second bit is set, this is a default method.
+   tst w26, #0x3
+   b.ne 3f
+   ldrh w3, [x26, #ART_METHOD_IMT_INDEX_OFFSET]
+2:
    ldr x2, [x2, #MIRROR_CLASS_IMT_PTR_OFFSET_64]
-   ldr x0, [x2, w0, uxtw #3]
+   ldr x0, [x2, w3, uxtw #3]
    .if $range
    b NterpCommonInvokeInterfaceRange
    .else
    b NterpCommonInvokeInterface
    .endif
-2:
+3:
+   tbnz w26, #0, 4f
+   and x26, x26, #-4
+   ldrh w3, [x26, #ART_METHOD_METHOD_INDEX_OFFSET]
+   and w3, w3, #ART_METHOD_IMT_MASK
+   b 2b
+4:
+   lsr w26, w26, #16
+   add w2, w2, #MIRROR_CLASS_VTABLE_OFFSET_64
+   ldr x0, [x2, w26, uxtw #3]
+   .if $range
+   b NterpCommonInvokeInstanceRange
+   .else
+   b NterpCommonInvokeInstance
+   .endif
+5:
    mov x0, xSELF
    ldr x1, [sp]
    mov x2, xPC
    bl nterp_get_method
-   // For j.l.Object interface calls, the high bit is set. Also the method index is 16bits.
-   tbz w0, #31, 1b
-   and w0, w0, #0xffff
-   .if $range
-   b NterpHandleInvokeInterfaceOnObjectMethodRange
-   .else
-   b NterpHandleInvokeInterfaceOnObjectMethod
-   .endif
+   mov x26, x0
+   b 1b
 
 %def op_invoke_interface():
 %  invoke_interface(range="0")
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index 8223a1b..b6d9db6 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -862,6 +862,7 @@
    // - xINST contains shorty (in callee-save to switch over return value after call).
    // - x0 contains method
    // - x1 contains 'this' pointer for instance method.
+   // - for interface calls, x26 contains the interface method.
    add x9, xINST, #1  // shorty + 1  ; ie skip return arg character
    FETCH w11, 2 // arguments
    .if \is_string_init
@@ -903,10 +904,8 @@
    bl art_quick_invoke_custom
    .else
       .if \is_interface
-      // Setup hidden argument. As we don't have access to the interface method,
-      // just pass the method from the IMT. If the method is the conflict trampoline,
-      // this will make the stub go to runtime, otherwise the hidden argument is unused.
-      mov ip2, x0
+      // Setup hidden argument.
+      mov ip2, x26
       .endif
       ldr lr, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
       blr lr
@@ -1102,6 +1101,7 @@
    // - xINST contains shorty (in callee-save to switch over return value after call).
    // - x0 contains method
    // - x1 contains 'this' pointer for instance method.
+   // - for interface calls, x26 contains the interface method.
    add x9, xINST, #1  // shorty + 1  ; ie skip return arg character
    FETCH w10, 2 // arguments
    .if \is_string_init
@@ -1154,10 +1154,8 @@
    bl art_quick_invoke_custom
    .else
       .if \is_interface
-      // Setup hidden argument. As we don't have access to the interface method,
-      // just pass the method from the IMT. If the method is the conflict trampoline,
-      // this will make the stub go to runtime, otherwise the hidden argument is unused.
-      mov ip2, x0
+      // Setup hidden argument.
+      mov ip2, x26
       .endif
       ldr lr, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
       blr lr
@@ -1582,27 +1580,6 @@
     // Jump to the compiled code.
     br xFP
 
-NterpHandleInvokeInterfaceOnObjectMethodRange:
-   // First argument is the 'this' pointer.
-   FETCH w1, 2
-   GET_VREG w1, w1
-   // Note: x1 is null, this will be handled by our SIGSEGV handler.
-   ldr w2, [x1, #MIRROR_OBJECT_CLASS_OFFSET]
-   add w2, w2, #MIRROR_CLASS_VTABLE_OFFSET_64
-   ldr x0, [x2, w0, sxtw #3]
-   b NterpCommonInvokeInstanceRange
-
-NterpHandleInvokeInterfaceOnObjectMethod:
-   // First argument is the 'this' pointer.
-   FETCH w1, 2
-   and w1, w1, #0xf
-   GET_VREG w1, w1
-   // Note: x1 is null, this will be handled by our SIGSEGV handler.
-   ldr w2, [x1, #MIRROR_OBJECT_CLASS_OFFSET]
-   add w2, w2, #MIRROR_CLASS_VTABLE_OFFSET_64
-   ldr x0, [x2, w0, sxtw #3]
-   b NterpCommonInvokeInstance
-
 // This is the logical end of ExecuteNterpImpl, where the frame info applies.
 // EndExecuteNterpImpl includes the methods below as we want the runtime to
 // see them as part of the Nterp PCs.
diff --git a/runtime/interpreter/mterp/armng/invoke.S b/runtime/interpreter/mterp/armng/invoke.S
index 47678dc..151890f 100644
--- a/runtime/interpreter/mterp/armng/invoke.S
+++ b/runtime/interpreter/mterp/armng/invoke.S
@@ -74,10 +74,11 @@
    b NterpCommonInvokePolymorphicRange
 
 %def invoke_interface(range=""):
+%  slow_path = add_helper(lambda: op_invoke_interface_slow_path())
    EXPORT_PC
    // Fast-path which gets the method from thread-local cache.
-   FETCH_FROM_THREAD_CACHE r0, 2f
-1:
+   FETCH_FROM_THREAD_CACHE r4, ${slow_path}
+.L${opcode}_resume:
    // First argument is the 'this' pointer.
    FETCH r1, 2
    .if !$range
@@ -86,27 +87,44 @@
    GET_VREG r1, r1
    // Note: if r1 is null, this will be handled by our SIGSEGV handler.
    ldr r2, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
+   // Test the first two bits of the fetched ArtMethod:
+   // - If the first bit is set, this is a method on j.l.Object
+   // - If the second bit is set, this is a default method.
+   tst r4, #3
+   bne 2f
+   ldrh r3, [r4, #ART_METHOD_IMT_INDEX_OFFSET]
+1:
    ldr r2, [r2, #MIRROR_CLASS_IMT_PTR_OFFSET_32]
-   ldr r0, [r2, r0, uxtw #2]
+   ldr r0, [r2, r3, uxtw #2]
    .if $range
    b NterpCommonInvokeInterfaceRange
    .else
    b NterpCommonInvokeInterface
    .endif
 2:
+   tst r4, #1
+   bne 3f
+   and r4, r4, #-4
+   ldrh r3, [r4, #ART_METHOD_METHOD_INDEX_OFFSET]
+   and r3, r3, #ART_METHOD_IMT_MASK
+   b 1b
+3:
+   lsr r4, r4, #16
+   add r2, r2, #MIRROR_CLASS_VTABLE_OFFSET_32
+   ldr r0, [r2, r4, lsl #2]
+   .if $range
+   b NterpCommonInvokeInstanceRange
+   .else
+   b NterpCommonInvokeInstance
+   .endif
+
+%def op_invoke_interface_slow_path():
    mov r0, rSELF
    ldr r1, [sp]
    mov r2, rPC
    bl nterp_get_method
-   // For j.l.Object interface calls, the high bit is set. Also the method index is 16bits.
-   cmp r0, #0
-   bge 1b
-   ubfx r0, r0, #0, #16
-   .if $range
-   b NterpHandleInvokeInterfaceOnObjectMethodRange
-   .else
-   b NterpHandleInvokeInterfaceOnObjectMethod
-   .endif
+   mov r4, r0
+   b .L${opcode}_resume
 
 %def op_invoke_interface():
 %  invoke_interface(range="0")
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
index 680c8ad..7095f58 100644
--- a/runtime/interpreter/mterp/armng/main.S
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -961,6 +961,10 @@
    .endif
 
 .Lcall_compiled_code_\suffix:
+   .if \is_interface
+   // Save hidden argument.
+   vmov s16, r4
+   .endif
    GET_SHORTY rINST, \is_interface, \is_polymorphic, \is_custom
    // From this point:
    // - rINST contains shorty (in callee-save to switch over return value after call).
@@ -1022,10 +1026,8 @@
    bl art_quick_invoke_custom
    .else
       .if \is_interface
-      // Setup hidden argument. As we don't have access to the interface method,
-      // just pass the method from the IMT. If the method is the conflict trampoline,
-      // this will make the stub go to runtime, otherwise the hidden argument is unused.
-      mov ip, r0
+      // Setup hidden argument.
+      vmov ip, s16
       .endif
       ldr lr, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
       blx lr
@@ -1160,6 +1162,10 @@
    .endif
 
 .Lcall_compiled_code_range_\suffix:
+   .if \is_interface
+   // Save hidden argument.
+   vmov s16, r4
+   .endif
    GET_SHORTY rINST, \is_interface, \is_polymorphic, \is_custom
    // From this point:
    // - rINST contains shorty (in callee-save to switch over return value after call).
@@ -1217,10 +1223,8 @@
    bl art_quick_invoke_custom
    .else
       .if \is_interface
-      // Setup hidden argument. As we don't have access to the interface method,
-      // just pass the method from the IMT. If the method is the conflict trampoline,
-      // this will make the stub go to runtime, otherwise the hidden argument is unused.
-      mov ip, r0
+      // Setup hidden argument.
+      vmov ip, s16
       .endif
       ldr lr, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
       blx lr
@@ -1551,26 +1555,6 @@
    bl art_quick_read_barrier_mark_reg00
    b 1b
 
-NterpHandleInvokeInterfaceOnObjectMethodRange:
-   // First argument is the 'this' pointer.
-   FETCH r1, 2
-   GET_VREG r1, r1
-   // Note: if r1 is null, this will be handled by our SIGSEGV handler.
-   ldr r2, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
-   add r2, r2, #MIRROR_CLASS_VTABLE_OFFSET_32
-   ldr r0, [r2, r0, lsl #2]
-   b NterpCommonInvokeInstanceRange
-
-NterpHandleInvokeInterfaceOnObjectMethod:
-   // First argument is the 'this' pointer.
-   FETCH r1, 2
-   and r1, r1, #0xf
-   GET_VREG r1, r1
-   // Note: if r1 is null, this will be handled by our SIGSEGV handler.
-   ldr r2, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
-   add r2, r2, #MIRROR_CLASS_VTABLE_OFFSET_32
-   ldr r0, [r2, r0, lsl #2]
-   b NterpCommonInvokeInstance
 
 NterpHandleHotnessOverflow:
     add r1, rPC, rINST, lsl #1
diff --git a/runtime/interpreter/mterp/nterp.cc b/runtime/interpreter/mterp/nterp.cc
index 3d92473..1da8be6 100644
--- a/runtime/interpreter/mterp/nterp.cc
+++ b/runtime/interpreter/mterp/nterp.cc
@@ -384,18 +384,23 @@
   }
 
   if (invoke_type == kInterface) {
+    size_t result = 0u;
     if (resolved_method->GetDeclaringClass()->IsObjectClass()) {
-      // Don't update the cache and return a value with high bit set to notify the
-      // interpreter it should do a vtable call instead.
+      // Set the low bit to notify the interpreter it should do a vtable call.
       DCHECK_LT(resolved_method->GetMethodIndex(), 0x10000);
-      return resolved_method->GetMethodIndex() | (1U << 31);
+      result = (resolved_method->GetMethodIndex() << 16) | 1U;
     } else {
       DCHECK(resolved_method->GetDeclaringClass()->IsInterface());
-      UpdateCache(self, dex_pc_ptr, resolved_method->GetImtIndex());
-      // TODO: We should pass the resolved method, and have nterp fetch the IMT
-      // index. Unfortunately, this doesn't work for default methods.
-      return resolved_method->GetImtIndex();
+      if (!resolved_method->IsAbstract()) {
+        // Set the second bit to notify the interpreter this is a default
+        // method.
+        result = reinterpret_cast<size_t>(resolved_method) | 2U;
+      } else {
+        result = reinterpret_cast<size_t>(resolved_method);
+      }
     }
+    UpdateCache(self, dex_pc_ptr, result);
+    return result;
   } else if (resolved_method->GetDeclaringClass()->IsStringClass()
              && !resolved_method->IsStatic()
              && resolved_method->IsConstructor()) {
diff --git a/runtime/interpreter/mterp/x86_64ng/invoke.S b/runtime/interpreter/mterp/x86_64ng/invoke.S
index b7885b4..ebe2fcf 100644
--- a/runtime/interpreter/mterp/x86_64ng/invoke.S
+++ b/runtime/interpreter/mterp/x86_64ng/invoke.S
@@ -72,10 +72,11 @@
    jmp NterpCommonInvokePolymorphicRange
 
 %def invoke_interface(helper="", range=""):
+%  slow_path = add_helper(lambda: op_invoke_interface_slow_path())
    EXPORT_PC
-   // Fast-path which gets the method from thread-local cache.
-   FETCH_FROM_THREAD_CACHE %rax, 2f
-1:
+   // Fast-path which gets the interface method from thread-local cache.
+   FETCH_FROM_THREAD_CACHE %rax, ${slow_path}
+.L${opcode}_resume:
    // First argument is the 'this' pointer.
    movzwl 4(rPC), %r11d // arguments
    .if !$range
@@ -83,23 +84,35 @@
    .endif
    movl (rFP, %r11, 4), %esi
    movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx
+   // Test the first two bits of the fetched ArtMethod:
+   // - If the first bit is set, this is a method on j.l.Object
+   // - If the second bit is set, this is a default method.
+   testl $$3, %eax
+   jne 2f
+   movzw ART_METHOD_IMT_INDEX_OFFSET(%rax), %ecx
+1:
    movq MIRROR_CLASS_IMT_PTR_OFFSET_64(%edx), %rdx
-   movq (%rdx, %rax, 8), %rdi
+   movq (%rdx, %rcx, 8), %rdi
    jmp $helper
 2:
+   testl $$1, %eax
+   .if $range
+   jne NterpHandleInvokeInterfaceOnObjectMethodRange
+   .else
+   jne NterpHandleInvokeInterfaceOnObjectMethod
+   .endif
+   // Default method
+   andq $$-4, %rax
+   movzw ART_METHOD_METHOD_INDEX_OFFSET(%rax), %ecx
+   andl $$ART_METHOD_IMT_MASK, %ecx
+   jmp 1b
+
+%def op_invoke_interface_slow_path():
    movq rSELF:THREAD_SELF_OFFSET, %rdi
    movq 0(%rsp), %rsi
    movq rPC, %rdx
    call nterp_get_method
-   testl %eax, %eax
-   jns 1b
-   // For j.l.Object interface calls, the high bit is set. Also the method index is 16bits.
-   andl LITERAL(0xffff), %eax
-   .if $range
-   jmp NterpHandleInvokeInterfaceOnObjectMethodRange
-   .else
-   jmp NterpHandleInvokeInterfaceOnObjectMethod
-   .endif
+   jmp .L${opcode}_resume
 
 %def op_invoke_interface():
 %  invoke_interface(helper="NterpCommonInvokeInterface", range="0")
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 4626381..5611b04 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -464,7 +464,7 @@
 // Uses rax as temporary.
 .macro LOOP_OVER_SHORTY_LOADING_GPRS gpr_reg64, gpr_reg32, inst, shorty, arg_index, finished
 1: // LOOP
-    movb (REG_VAR(shorty)), %al   // bl := *shorty
+    movb (REG_VAR(shorty)), %al   // al := *shorty
     addq MACRO_LITERAL(1), REG_VAR(shorty)  // shorty++
     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto finished
     je  VAR(finished)
@@ -531,7 +531,7 @@
 // Uses rax as temporary.
 .macro LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm_reg, shorty, arg_index, stack_index, finished
 1: // LOOP
-    movb (REG_VAR(shorty)), %al             // bl := *shorty
+    movb (REG_VAR(shorty)), %al             // al := *shorty
     addq MACRO_LITERAL(1), REG_VAR(shorty)  // shorty++
     cmpb MACRO_LITERAL(0), %al              // if (al == '\0') goto finished
     je VAR(finished)
@@ -605,7 +605,7 @@
 // Uses rax as temporary.
 .macro LOOP_RANGE_OVER_SHORTY_LOADING_GPRS gpr_reg64, gpr_reg32, shorty, arg_index, stack_index, finished
 1: // LOOP
-    movb (REG_VAR(shorty)), %al             // bl := *shorty
+    movb (REG_VAR(shorty)), %al             // al := *shorty
     addq MACRO_LITERAL(1), REG_VAR(shorty)  // shorty++
     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto finished
     je  VAR(finished)
@@ -706,7 +706,7 @@
 // Uses rax as temporary.
 .macro LOOP_OVER_SHORTY_STORING_GPRS gpr_reg64, gpr_reg32, shorty, arg_index, regs, refs, finished
 1: // LOOP
-    movb (REG_VAR(shorty)), %al             // bl := *shorty
+    movb (REG_VAR(shorty)), %al             // al := *shorty
     addq MACRO_LITERAL(1), REG_VAR(shorty)  // shorty++
     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto finished
     je  VAR(finished)
@@ -746,7 +746,7 @@
 // (The trade-off is different for passing arguments and receiving them.)
 .macro LOOP_OVER_FPs shorty, arg_index, regs, stack_ptr, finished
 1: // LOOP
-    movb (REG_VAR(shorty)), %al             // bl := *shorty
+    movb (REG_VAR(shorty)), %al             // al := *shorty
     addq MACRO_LITERAL(1), REG_VAR(shorty)  // shorty++
     cmpb MACRO_LITERAL(0), %al              // if (al == '\0') goto finished
     je VAR(finished)
@@ -778,7 +778,7 @@
 // Uses rax as temporary.
 .macro LOOP_OVER_INTs shorty, arg_index, regs, refs, stack_ptr, finished
 1: // LOOP
-    movb (REG_VAR(shorty)), %al             // bl := *shorty
+    movb (REG_VAR(shorty)), %al             // al := *shorty
     addq MACRO_LITERAL(1), REG_VAR(shorty)  // shorty++
     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto finished
     je  VAR(finished)
@@ -1041,10 +1041,11 @@
    movq %rax, \dest
 .endm
 
+// Uses r9 as temporary.
 .macro DO_ENTRY_POINT_CHECK call_compiled_code
    // On entry, the method is %rdi, the instance is %rsi
-   leaq ExecuteNterpImpl(%rip), %rax
-   cmpq %rax, ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
+   leaq ExecuteNterpImpl(%rip), %r9
+   cmpq %r9, ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
    jne  VAR(call_compiled_code)
 
    movq ART_METHOD_DATA_OFFSET_64(%rdi), %rax
@@ -1084,6 +1085,10 @@
    .endif
 
 .Lcall_compiled_code_\suffix:
+   .if \is_interface
+   // Save interface method, used for conflict resolution, in a callee-save register.
+   movq %rax, %xmm12
+   .endif
    GET_SHORTY rINSTq, \is_interface, \is_polymorphic, \is_custom
    // From this point:
    // - rISNTq contains shorty (in callee-save to switch over return value after call).
@@ -1130,10 +1135,7 @@
    call SYMBOL(art_quick_invoke_custom)
    .else
       .if \is_interface
-      // Setup hidden argument. As we don't have access to the interface method,
-      // just pass the method from the IMT. If the method is the conflict trampoline,
-      // this will make the stub go to runtime, otherwise the hidden argument is unused.
-      movq %rdi, %rax
+      movq %xmm12, %rax
       .endif
       call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
    .endif
@@ -1180,6 +1182,10 @@
    .endif
 
 .Lcall_compiled_code_range_\suffix:
+   .if \is_interface
+   // Save interface method, used for conflict resolution, in a callee-saved register.
+   movq %rax, %xmm12
+   .endif
    GET_SHORTY rINSTq, \is_interface, \is_polymorphic, \is_custom
    // From this point:
    // - rINSTq contains shorty (in callee-save to switch over return value after call).
@@ -1232,10 +1238,8 @@
    call SYMBOL(art_quick_invoke_custom)
    .else
      .if \is_interface
-     // Setup hidden argument. As we don't have access to the interface method,
-     // just pass the method from the IMT. If the method is the conflict trampoline,
-     // this will make the stub go to runtime, otherwise the hidden argument is unused.
-     movq %rdi, %rax
+     // Set the hidden argument for conflict resolution.
+     movq %xmm12, %rax
      .endif
      call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
    .endif
@@ -1983,21 +1987,12 @@
     jmp *%rbx
 
 NterpHandleInvokeInterfaceOnObjectMethodRange:
-   // First argument is the 'this' pointer.
-   movzwl 4(rPC), %r11d // arguments
-   movl (rFP, %r11, 4), %esi
-   // Note: if esi is null, this will be handled by our SIGSEGV handler.
-   movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx
+   shrl $$16, %eax
    movq MIRROR_CLASS_VTABLE_OFFSET_64(%edx, %eax, 8), %rdi
    jmp NterpCommonInvokeInstanceRange
 
 NterpHandleInvokeInterfaceOnObjectMethod:
-   // First argument is the 'this' pointer.
-   movzwl 4(rPC), %r11d // arguments
-   andq MACRO_LITERAL(0xf), %r11
-   movl (rFP, %r11, 4), %esi
-   // Note: if esi is null, this will be handled by our SIGSEGV handler.
-   movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx
+   shrl $$16, %eax
    movq MIRROR_CLASS_VTABLE_OFFSET_64(%edx, %eax, 8), %rdi
    jmp NterpCommonInvokeInstance
 
diff --git a/tools/cpp-define-generator/art_method.def b/tools/cpp-define-generator/art_method.def
index 097d466..7b5606f 100644
--- a/tools/cpp-define-generator/art_method.def
+++ b/tools/cpp-define-generator/art_method.def
@@ -16,12 +16,15 @@
 
 #if ASM_DEFINE_INCLUDE_DEPENDENCIES
 #include "art_method.h"
+#include "imtable.h"
 #endif
 
 ASM_DEFINE(ART_METHOD_ACCESS_FLAGS_OFFSET,
            art::ArtMethod::AccessFlagsOffset().Int32Value())
 ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG,
            art::kAccStatic)
+ASM_DEFINE(ART_METHOD_IMT_MASK,
+           art::ImTable::kSizeTruncToPowerOfTwo - 1)
 ASM_DEFINE(ART_METHOD_DECLARING_CLASS_OFFSET,
            art::ArtMethod::DeclaringClassOffset().Int32Value())
 ASM_DEFINE(ART_METHOD_JNI_OFFSET_32,