JNI: Add "no inline" entrypoints for lock/unlock.

This re-enables lock logging for synchronized native methods
which was mistakenly lost in
    https://android-review.googlesource.com/1898923 .

Also update a comment about arm assembler workaround.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I76fb2f4c16d261b28dfbe1e505b7cb173cc06005
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index 26c1d31..c69e209 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -295,8 +295,23 @@
      *     All argument registers need to be preserved.
      */
 ENTRY art_jni_lock_object
+    // Note: the slow path is actually the art_jni_lock_object_no_inline (tail call).
     LOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Llock_object_jni_slow, /*can_be_null*/ 0
+END art_jni_lock_object
 
+    /*
+     * Entry from JNI stub that calls `artLockObjectFromCode()`
+     * (the same as for managed code), may block for GC.
+     * Custom calling convention:
+     *     r4 holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     All argument registers need to be preserved.
+     */
+    .extern artLockObjectFromCode
+ENTRY art_jni_lock_object_no_inline
+    // This is also the slow path for art_jni_lock_object.
+    // Note that we need a local label as the assembler emits bad instructions
+    // for CBZ/CBNZ if we try to jump to `art_jni_lock_object_no_inline`.
 .Llock_object_jni_slow:
     // Save managed args, r4 (for stack alignment) and LR.
     SAVE_MANAGED_ARGS_R4_LR_INCREASE_FRAME
@@ -318,7 +333,7 @@
     // Rely on the JNI transition frame constructed in the JNI stub.
     mov    r0, rSELF                           @ Pass Thread::Current().
     b      artDeliverPendingExceptionFromCode  @ (Thread*)
-END art_jni_lock_object
+END art_jni_lock_object_no_inline
 
     /*
      * Entry from JNI stub that tries to unlock the object in a fast path and calls
@@ -329,11 +344,25 @@
      *     Callee-save registers have been saved and can be used as temporaries.
      *     Return registers r0-r1 and s0-s1 need to be preserved.
      */
-    .extern artJniLockObject
 ENTRY art_jni_unlock_object
+    // Note: the slow path is actually the art_jni_unlock_object_no_inline (tail call).
     UNLOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Lunlock_object_jni_slow, /*can_be_null*/ 0
+END art_jni_unlock_object
 
- .Lunlock_object_jni_slow:
+    /*
+     * Entry from JNI stub that calls `artJniUnlockObject()`. Note that failure to
+     * unlock is fatal, so we do not need to check for exceptions.
+     * Custom calling convention:
+     *     r4 holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     Return registers r0-r1 and s0-s1 need to be preserved.
+     */
+    .extern artJniUnlockObject
+ENTRY art_jni_unlock_object_no_inline
+    // This is also the slow path for art_jni_unlock_object.
+    // Note that we need a local label as the assembler emits bad instructions
+    // for CBZ/CBNZ if we try to jump to `art_jni_unlock_object_no_inline`.
+.Lunlock_object_jni_slow:
     // Save GPR return registers and return address. Also save r4 for stack alignment.
     push   {r0-r1, r4, lr}
     .cfi_adjust_cfa_offset 16
@@ -350,4 +379,4 @@
     .cfi_adjust_cfa_offset -8
     // Restore GPR return registers and r4 and return.
     pop    {r0-r1, r4, pc}
-END art_jni_unlock_object
+END art_jni_unlock_object_no_inline
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bc6902d..6145d9a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -509,9 +509,9 @@
      */
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object_no_inline
-    // This is also the slow path for art_quick_lock_object. Note that we
-    // need a local label, the assembler complains about target being out of
-    // range if we try to jump to `art_quick_lock_object_no_inline`.
+    // This is also the slow path for art_quick_lock_object.
+    // Note that we need a local label as the assembler emits bad instructions
+    // for CBZ/CBNZ if we try to jump to `art_quick_lock_object_no_inline`.
 .Llock_object_slow:
     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
     mov    r1, rSELF                  @ pass Thread::Current
@@ -539,9 +539,9 @@
      */
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object_no_inline
-    // This is also the slow path for art_quick_unlock_object. Note that we
-    // need a local label, the assembler complains about target being out of
-    // range if we try to jump to `art_quick_unlock_object_no_inline`.
+    // This is also the slow path for art_quick_unlock_object.
+    // Note that we need a local label as the assembler emits bad instructions
+    // for CBZ/CBNZ if we try to jump to `art_quick_unlock_object_no_inline`.
 .Lunlock_object_slow:
     @ save callee saves in case exception allocation triggers GC
     SETUP_SAVE_REFS_ONLY_FRAME r1
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index 701ce2e..36fca55 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -341,11 +341,22 @@
      *     Callee-save registers have been saved and can be used as temporaries.
      *     All argument registers need to be preserved.
      */
-    .extern artLockObjectFromCode
 ENTRY art_jni_lock_object
-    LOCK_OBJECT_FAST_PATH x15, .Llock_object_jni_slow, /*can_be_null*/ 0
+    LOCK_OBJECT_FAST_PATH x15, art_jni_lock_object_no_inline, /*can_be_null*/ 0
+END art_jni_lock_object
 
-.Llock_object_jni_slow:
+    /*
+     * Entry from JNI stub that calls `artLockObjectFromCode()`
+     * (the same as for managed code), may block for GC.
+     * Custom calling convention:
+     *     x15 holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     All argument registers need to be preserved.
+     */
+    .extern artLockObjectFromCode
+ENTRY art_jni_lock_object_no_inline
+    // This is also the slow path for art_jni_lock_object.
+    // Save args and LR.
     SAVE_ALL_ARGS_INCREASE_FRAME /*padding*/ 8 + /*LR*/ 8
     str    lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)]
     .cfi_rel_offset lr, ALL_ARGS_SIZE + /*padding*/ 8
@@ -369,7 +380,7 @@
     // Rely on the JNI transition frame constructed in the JNI stub.
     mov    x0, xSELF                           // Pass Thread::Current().
     b      artDeliverPendingExceptionFromCode  // (Thread*)
-END art_jni_lock_object
+END art_jni_lock_object_no_inline
 
     /*
      * Entry from JNI stub that tries to unlock the object in a fast path and calls
@@ -380,11 +391,21 @@
      *     Callee-save registers have been saved and can be used as temporaries.
      *     Return registers r0 and d0 need to be preserved.
      */
-    .extern artJniUnlockObject
 ENTRY art_jni_unlock_object
-    UNLOCK_OBJECT_FAST_PATH x15, .Lunlock_object_jni_slow, /*can_be_null*/ 0
+    UNLOCK_OBJECT_FAST_PATH x15, art_jni_unlock_object_no_inline, /*can_be_null*/ 0
+END art_jni_unlock_object
 
- .Lunlock_object_jni_slow:
+    /*
+     * Entry from JNI stub that calls `artJniUnlockObject()`. Note that failure to
+     * unlock is fatal, so we do not need to check for exceptions.
+     * Custom calling convention:
+     *     x15 holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     Return registers r0 and d0 need to be preserved.
+     */
+    .extern artJniUnlockObject
+ENTRY art_jni_unlock_object_no_inline
+    // This is also the slow path for art_jni_unlock_object.
     // Save return registers and return address.
     stp    x0, lr, [sp, #-32]!
     .cfi_adjust_cfa_offset 32
@@ -400,4 +421,4 @@
     .cfi_adjust_cfa_offset -32
     .cfi_restore lr
     ret
-END art_jni_unlock_object
+END art_jni_unlock_object_no_inline
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index 049a0bd..36d4c36 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -22,12 +22,12 @@
 // With `call_args_space = 0`, the ESP shall be 8-byte aligned but not 16-byte aligned,
 // so either the `call_args_space` should be 8 (or 24, 40, ...) or the user of the macro
 // needs to adjust the ESP explicitly afterwards.
-MACRO2(SAVE_MANAGED_ARGS_INCREASE_FRAME, eax_value, call_args_space)
+MACRO1(SAVE_MANAGED_ARGS_INCREASE_FRAME, call_args_space)
     // Return address is on the stack.
     PUSH_ARG ebx
     PUSH_ARG edx
     PUSH_ARG ecx
-    PUSH_ARG \eax_value
+    PUSH_ARG eax
     // Make xmm<n> spill slots 8-byte aligned.
     INCREASE_FRAME (\call_args_space + /*FPRs*/ 4 * 8 + /*padding*/ 4)
     movsd %xmm0, \call_args_space + 0(%esp)
@@ -242,7 +242,7 @@
 DEFINE_FUNCTION art_jni_read_barrier
     // Note: Managed callee-save registers have been saved by the JNI stub.
     // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3, add argument space and padding.
-    SAVE_MANAGED_ARGS_INCREASE_FRAME eax, /*argument*/ 4 + /*padding*/ 4
+    SAVE_MANAGED_ARGS_INCREASE_FRAME /*argument*/ 4 + /*padding*/ 4
     // Pass the method argument.
     movl %eax, (%esp);
     call SYMBOL(artJniReadBarrier)  // (ArtMethod*)
@@ -265,8 +265,22 @@
     LOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi .Llock_object_jni_slow
 
 .Llock_object_jni_slow:
+    movl %edi, %eax                       // Restore EAX.
+    jmp  SYMBOL(art_jni_lock_object_no_inline)
+END_FUNCTION art_jni_lock_object
+
+    /*
+     * Entry from JNI stub that calls `artLockObjectFromCode()`
+     * (the same as for managed code), may block for GC.
+     * Custom calling convention:
+     *     EBP holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries (except EBP).
+     *     All argument registers need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_lock_object_no_inline
+    // This is also the slow path for art_jni_lock_object.
     // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3; original value of EAX is in EDI.
-    SAVE_MANAGED_ARGS_INCREASE_FRAME edi, /*call_args_space*/ 0
+    SAVE_MANAGED_ARGS_INCREASE_FRAME /*call_args_space*/ 0
     // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
     // Call `artLockObjectFromCode()`
     pushl %fs:THREAD_SELF_OFFSET          // Pass Thread::Current().
@@ -289,7 +303,7 @@
     CFI_ADJUST_CFA_OFFSET(4)
     call SYMBOL(artDeliverPendingExceptionFromCode)  // (Thread*)
     UNREACHABLE
-END_FUNCTION art_jni_lock_object
+END_FUNCTION art_jni_lock_object_no_inline
 
     /*
      * Entry from JNI stub that tries to unlock the object in a fast path and calls
@@ -305,6 +319,20 @@
     UNLOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi, .Lunlock_object_jni_slow
 
  .Lunlock_object_jni_slow:
+    movl %edi, %eax                       // Restore EAX.
+    jmp  SYMBOL(art_jni_unlock_object_no_inline)
+END_FUNCTION art_jni_unlock_object
+
+    /*
+     * Entry from JNI stub that calls `artJniUnlockObject()`. Note that failure to
+     * unlock is fatal, so we do not need to check for exceptions.
+     * Custom calling convention:
+     *     EBP holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries (except EBP).
+     *     Return registers EAX, EDX and mmx0 need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_unlock_object_no_inline
+    // This is also the slow path for art_jni_unlock_object.
     // Save return registers.
     PUSH_ARG edx
     PUSH_ARG edi  // Original contents of EAX.
@@ -322,4 +350,4 @@
     POP_ARG eax
     POP_ARG edx
     ret
-END_FUNCTION art_jni_unlock_object
+END_FUNCTION art_jni_unlock_object_no_inline
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index dca4128..3cef2b0 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -377,9 +377,19 @@
      *     All argument registers need to be preserved.
      */
 DEFINE_FUNCTION art_jni_lock_object
-    LOCK_OBJECT_FAST_PATH rbx, ebp, .Llock_object_jni_slow
+    LOCK_OBJECT_FAST_PATH rbx, ebp, art_jni_lock_object_no_inline
+END_FUNCTION art_jni_lock_object
 
-.Llock_object_jni_slow:
+    /*
+     * Entry from JNI stub that calls `artLockObjectFromCode()`
+     * (the same as for managed code), may block for GC.
+     * Custom calling convention:
+     *     RBX holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries (except RBX).
+     *     All argument registers need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_lock_object_no_inline
+    // This is also the slow path for art_jni_lock_object.
     // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack.
     SAVE_MANAGED_ARGS_INCREASE_FRAME
     // Call `artLockObjectFromCode()`
@@ -399,7 +409,7 @@
     // Rely on the JNI transition frame constructed in the JNI stub.
     movq %gs:THREAD_SELF_OFFSET, %rdi     // Pass Thread::Current().
     jmp  SYMBOL(artDeliverPendingExceptionFromCode)  // (Thread*); tail call.
-END_FUNCTION art_jni_lock_object
+END_FUNCTION art_jni_lock_object_no_inline
 
     /*
      * Entry from JNI stub that tries to unlock the object in a fast path and calls
@@ -415,8 +425,22 @@
     UNLOCK_OBJECT_FAST_PATH rbx, ebp, /*saved_rax*/ r12, .Lunlock_object_jni_slow
 
  .Lunlock_object_jni_slow:
+    movq %r12, %rax                       // Restore RAX.
+    jmp  SYMBOL(art_jni_unlock_object_no_inline)
+END_FUNCTION art_jni_unlock_object
+
+    /*
+     * Entry from JNI stub that calls `artJniUnlockObject()`. Note that failure to
+     * unlock is fatal, so we do not need to check for exceptions.
+     * Custom calling convention:
+     *     RBX holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries (except RBX).
+     *     Return registers RAX and mmx0 need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_unlock_object_no_inline
+    // This is also the slow path for art_jni_unlock_object.
     // Save return registers and return address.
-    PUSH_ARG r12  // Original contents of RAX.
+    PUSH_ARG rax
     INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
     movsd %xmm0, 0(%rsp)
     // Call `artJniUnlockObject()`.
@@ -428,4 +452,4 @@
     DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
     POP_ARG rax
     ret
-END_FUNCTION art_jni_unlock_object
+END_FUNCTION art_jni_unlock_object_no_inline
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index 44127e6..32c1e91 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -119,7 +119,9 @@
 
 // JNI lock/unlock entrypoints. Note: Custom calling convention.
 extern "C" void art_jni_lock_object(art::mirror::Object*);
+extern "C" void art_jni_lock_object_no_inline(art::mirror::Object*);
 extern "C" void art_jni_unlock_object(art::mirror::Object*);
+extern "C" void art_jni_unlock_object_no_inline(art::mirror::Object*);
 
 // Polymorphic invoke entrypoints.
 extern "C" void art_quick_invoke_polymorphic(uint32_t, void*);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 1348241..d2d35f6 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -78,15 +78,17 @@
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
   qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult;
-  qpoints->pJniLockObject = art_jni_lock_object;
-  qpoints->pJniUnlockObject = art_jni_unlock_object;
   qpoints->pJniReadBarrier = art_jni_read_barrier;
 
   // Locks
   if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    qpoints->pJniLockObject = art_jni_lock_object_no_inline;
+    qpoints->pJniUnlockObject = art_jni_unlock_object_no_inline;
     qpoints->pLockObject = art_quick_lock_object_no_inline;
     qpoints->pUnlockObject = art_quick_unlock_object_no_inline;
   } else {
+    qpoints->pJniLockObject = art_jni_lock_object;
+    qpoints->pJniUnlockObject = art_jni_unlock_object;
     qpoints->pLockObject = art_quick_lock_object;
     qpoints->pUnlockObject = art_quick_unlock_object;
   }