Merge "Refactor GetIMTIndex"
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d776fb4..c8d6ddc 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1745,6 +1745,7 @@
 
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1770,29 +1771,57 @@
   Register dstBegin = XRegisterFrom(locations->InAt(4));
 
   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
-  Register src_ptr_end = XRegisterFrom(locations->GetTemp(1));
+  Register num_chr = XRegisterFrom(locations->GetTemp(1));
+  Register tmp1 = XRegisterFrom(locations->GetTemp(2));
 
   UseScratchRegisterScope temps(masm);
   Register dst_ptr = temps.AcquireX();
-  Register tmp = temps.AcquireW();
+  Register tmp2 = temps.AcquireX();
 
-  // src range to copy.
+  // src address to copy from.
   __ Add(src_ptr, srcObj, Operand(value_offset));
-  __ Add(src_ptr_end, src_ptr, Operand(srcEnd, LSL, 1));
   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
 
-  // dst to be copied.
+  // dst address start to copy to.
   __ Add(dst_ptr, dstObj, Operand(data_offset));
   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
 
+  __ Sub(num_chr, srcEnd, srcBegin);
+
   // Do the copy.
-  vixl::Label loop, done;
+  vixl::Label loop;
+  vixl::Label done;
+  vixl::Label remainder;
+
+  // Early out for valid zero-length retrievals.
+  __ Cbz(num_chr, &done);
+
+  // Save repairing the value of num_chr on the < 8 character path.
+  __ Subs(tmp1, num_chr, 8);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
+  __ Mov(num_chr, tmp1);
+
+  // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
+  // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
   __ Bind(&loop);
-  __ Cmp(src_ptr, src_ptr_end);
-  __ B(&done, eq);
-  __ Ldrh(tmp, MemOperand(src_ptr, char_size, vixl::PostIndex));
-  __ Strh(tmp, MemOperand(dst_ptr, char_size, vixl::PostIndex));
-  __ B(&loop);
+  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, vixl::PostIndex));
+  __ Subs(num_chr, num_chr, 8);
+  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, vixl::PostIndex));
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 8);
+  __ B(eq, &done);
+
+  // Main loop for < 8 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, vixl::PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(tmp1, MemOperand(dst_ptr, char_size, vixl::PostIndex));
+  __ B(gt, &remainder);
+
   __ Bind(&done);
 }
 
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index 44c7649..38ca76a 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -30,18 +30,17 @@
 .arch armv7-a
 .thumb
 
-// Macro to generate the value of Runtime::Current into rDest clobbering rTemp. As it uses labels
+// Macro to generate the value of Runtime::Current into rDest. As it uses labels
 // then the labels need to be unique. We bind these to the function name in the ENTRY macros.
-.macro RUNTIME_CURRENT name, num, rDest, rTemp
+.macro RUNTIME_CURRENT name, num, rDest
     .if .Lruntime_current\num\()_used
          .error
     .endif
     .set .Lruntime_current\num\()_used, 1
-    ldr \rDest, .Lgot_\name\()_\num               @ Load offset of the GOT.
-    ldr \rTemp, .Lruntime_instance_\name\()_\num  @ Load GOT offset of Runtime::instance_.
+    ldr \rDest, .Lruntime_instance_\name\()_\num  @ Load GOT_PREL offset of Runtime::instance_.
 .Lload_got_\name\()_\num\():
-    add \rDest, pc                                @ Fixup GOT address.
-    ldr \rDest, [\rDest, \rTemp]                  @ Load address of Runtime::instance_.
+    add \rDest, pc                                @ Fixup GOT_PREL address.
+    ldr \rDest, [\rDest]                          @ Load address of Runtime::instance_.
     ldr \rDest, [\rDest]                          @ Load Runtime::instance_.
 .endm
 
@@ -90,26 +89,20 @@
     DEF_ENTRY .arm, \name
 .endm
 
-// Terminate an ENTRY and generate GOT references.
+// Terminate an ENTRY and generate GOT_PREL references.
 .macro END name
      // Generate offsets of GOT and Runtime::instance_ used in RUNTIME_CURRENT.
      .if .Lruntime_current1_used
-         .Lgot_\name\()_1:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_1+4)
          .Lruntime_instance_\name\()_1:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_1+4)
      .endif
      .if .Lruntime_current2_used
-         .Lgot_\name\()_2:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_2+4)
          .Lruntime_instance_\name\()_2:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_2+4)
     .endif
      .if .Lruntime_current3_used
-         .Lgot_\name\()_3:
-             .word   _GLOBAL_OFFSET_TABLE_-(.Lload_got_\name\()_3+4)
          .Lruntime_instance_\name\()_3:
-             .word   _ZN3art7Runtime9instance_E(GOT)
+             .word   _ZN3art7Runtime9instance_E(GOT_PREL)-(.Lload_got_\name\()_3+4)
     .endif
     // Remove the RUNTIME_CURRENTx macros so they get rebound in the next function entry.
     .purgem RUNTIME_CURRENT1
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index ffac030..c3a5829 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -206,6 +206,7 @@
   struct sigaction sa, osa;
   sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
   sa.sa_sigaction = bad_divide_inst_handle;
+  sigemptyset(&sa.sa_mask);
   sigaction(SIGILL, &sa, &osa);
 
   bool has_div = false;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5209bb6..dd7063f 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -44,15 +44,15 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
-.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp
     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     .cfi_adjust_cfa_offset 64
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .cfi_adjust_cfa_offset 12
-    RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp is kSaveAll Method*.
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
      // Ugly compile-time check, but we only have the preprocessor.
@@ -65,7 +65,7 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsOnly).
      */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp
     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     .cfi_adjust_cfa_offset 28
     .cfi_rel_offset r5, 0
@@ -77,9 +77,9 @@
     .cfi_rel_offset lr, 24
     sub sp, #4                                    @ bottom word will hold Method*
     .cfi_adjust_cfa_offset 4
-    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp is kRefsOnly Method*.
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
     // Ugly compile-time check, but we only have the preprocessor.
@@ -88,30 +88,6 @@
 #endif
 .endm
 
-    /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
-     * and preserves the value of rTemp2 at entry.
-     */
-.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2 rTemp1, rTemp2
-    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
-    .cfi_adjust_cfa_offset 28
-    .cfi_rel_offset r5, 0
-    .cfi_rel_offset r6, 4
-    .cfi_rel_offset r7, 8
-    .cfi_rel_offset r8, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset lr, 24
-    sub sp, #4                                    @ bottom word will hold Method*
-    .cfi_adjust_cfa_offset 4
-    str \rTemp2, [sp, #0]                         @ save rTemp2
-    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
-    ldr \rTemp2, [sp, #0]                         @ restore rTemp2
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
-    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
-
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
@@ -164,12 +140,12 @@
 #endif
 .endm
 
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
-    RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
-     @ rTemp1 is kRefsAndArgs Method*.
-    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
-    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
+     @ rTemp is kRefsAndArgs Method*.
+    ldr \rTemp, [\rTemp, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
+    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
@@ -217,7 +193,7 @@
 .macro DELIVER_PENDING_EXCEPTION
     .fnend
     .fnstart
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1    @ save callee saves for throw
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0        @ save callee saves for throw
     mov    r0, r9                              @ pass Thread::Current
     b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
 .endm
@@ -225,7 +201,7 @@
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r0, r1 // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0        @ save all registers as basis for long jump context
     mov r0, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -234,7 +210,7 @@
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1, r2  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1        @ save all registers as basis for long jump context
     mov r1, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -243,7 +219,7 @@
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r2, r3  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   \cxx_name                   @ \cxx_name(Thread*)
 END \c_name
@@ -275,7 +251,7 @@
 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1 @ save callee saves in case of GC
     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*)
@@ -287,7 +263,7 @@
 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2 @ save callee saves in case of GC
     ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*)
@@ -299,7 +275,7 @@
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3 @ save callee saves in case of GC
     ldr    r3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
@@ -360,7 +336,7 @@
      */
 .macro INVOKE_TRAMPOLINE_BODY cxx_name
     .extern \cxx_name
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case allocation triggers GC
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2  @ save callee saves in case allocation triggers GC
     mov    r2, r9                         @ pass Thread::Current
     mov    r3, sp
     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
@@ -566,7 +542,7 @@
 .Llock_strex_fail:
     b      .Lretry_lock               @ retry
 .Lslow_lock:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -575,7 +551,7 @@
 END art_quick_lock_object
 
 ENTRY art_quick_lock_object_no_inline
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -632,7 +608,7 @@
     b      .Lretry_unlock             @ retry
 .Lslow_unlock:
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -642,7 +618,7 @@
 
 ENTRY art_quick_unlock_object_no_inline
     @ save callee saves in case exception allocation triggers GC
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1
     mov    r1, r9                     @ pass Thread::Current
     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -677,7 +653,7 @@
     .cfi_restore r0
     .cfi_restore r1
     .cfi_restore lr
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2, r3  // save all registers as basis for long jump context
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     b   artThrowClassCastException  @ (Class*, Class*, Thread*)
     bkpt
@@ -813,7 +789,7 @@
 .Lthrow_array_store_exception:
     pop {r0-r2, lr}
     /* No need to repeat restore cfi directives, the ones above apply here. */
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3, ip
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3
     mov r1, r2
     mov r2, r9                     @ pass Thread::Current
     b artThrowArrayStoreException  @ (Class*, Class*, Thread*)
@@ -824,7 +800,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r1, r2  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1  @ save callee saves in case of GC
     mov    r1, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -836,7 +812,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2  @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -848,7 +824,7 @@
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3  @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
     bl     \entrypoint
@@ -861,7 +837,7 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2  r12, r3  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12 @ save callee saves in case of GC
     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint
@@ -890,7 +866,7 @@
      */
     .extern artGet64StaticFromCode
 ENTRY art_quick_get64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2 @ save callee saves in case of GC
     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r2, r9                        @ pass Thread::Current
     bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*)
@@ -916,7 +892,7 @@
      */
     .extern artGet64InstanceFromCode
 ENTRY art_quick_get64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2 @ save callee saves in case of GC
     ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     mov    r3, r9                        @ pass Thread::Current
     bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*)
@@ -941,7 +917,7 @@
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r12   @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1 @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
@@ -966,7 +942,7 @@
      */
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12 @ save callee saves in case of GC
                                          @ r2:r3 contain the wide argument
     ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
@@ -1087,7 +1063,7 @@
     bx     lr
 
 .Lart_quick_alloc_object_rosalloc_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2  @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1172,7 +1148,7 @@
     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2                      // Save callee saves in case of GC.
     mov    r2, r9                                             // Pass Thread::Current.
     bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1207,7 +1183,7 @@
     pop    {r0, r1, r3, lr}
     b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_object_region_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2                      // Save callee saves in case of GC.
     mov    r2, r9                                             // Pass Thread::Current.
     bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1227,7 +1203,7 @@
 1:
 #endif
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for GC stack crawl
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1      @ save callee saves for GC stack crawl
     @ TODO: save FPRs to enable access in the debugger?
     bl     artTestSuspendFromCode             @ (Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -1235,7 +1211,7 @@
 
 ENTRY art_quick_implicit_suspend
     mov    r0, rSELF
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for stack crawl
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1      @ save callee saves for stack crawl
     bl     artTestSuspendFromCode             @ (Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_implicit_suspend
@@ -1298,7 +1274,7 @@
 
     .extern artQuickResolutionTrampoline
 ENTRY art_quick_resolution_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2
     mov     r2, r9                 @ pass Thread::Current
     mov     r3, sp                 @ pass SP
     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
@@ -1403,7 +1379,7 @@
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1, r2
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1
     mov     r1, r9                 @ pass Thread::Current
     mov     r2, sp                 @ pass SP
     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
@@ -1426,7 +1402,7 @@
     .extern artInstrumentationMethodExitFromCode
 ENTRY art_quick_instrumentation_entry
     @ Make stack crawlable and clobber r2 and r3 (post saving)
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2
     @ preserve r0 (not normally an arg) knowing there is a spare slot in kRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
@@ -1441,7 +1417,7 @@
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ set up frame knowing r2 and r3 must be dead on exit
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2  @ set up frame knowing r2 and r3 must be dead on exit
     mov   r12, sp        @ remember bottom of caller's frame
     push  {r0-r1}        @ save return value
     .cfi_adjust_cfa_offset 8
@@ -1480,7 +1456,7 @@
      */
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0
     mov    r0, r9         @ Set up args.
     blx    artDeoptimize  @ artDeoptimize(Thread*)
 END art_quick_deoptimize
@@ -1491,7 +1467,7 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0
     mov    r0, r9                         @ Set up args.
     blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
 END art_quick_deoptimize_from_compiled_code
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 98d3345..a102858 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -33,9 +33,10 @@
 
 namespace art {
 
+template<ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ArtField::GetDeclaringClass() {
   GcRootSource gc_root_source(this);
-  mirror::Class* result = declaring_class_.Read(&gc_root_source);
+  mirror::Class* result = declaring_class_.Read<kReadBarrierOption>(&gc_root_source);
   DCHECK(result != nullptr);
   DCHECK(result->IsLoaded() || result->IsErroneous()) << result->GetStatus();
   return result;
diff --git a/runtime/art_field.h b/runtime/art_field.h
index b64b70f..aaccbf3 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -41,6 +41,7 @@
  public:
   ArtField();
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Class* GetDeclaringClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetDeclaringClass(mirror::Class *new_declaring_class)
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 9f63bca..32ae6ff 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -399,8 +399,9 @@
   return GetDeclaringClass()->GetDexCache();
 }
 
+template<ReadBarrierOption kReadBarrierOption>
 inline bool ArtMethod::IsProxyMethod() {
-  return GetDeclaringClass()->IsProxyClass();
+  return GetDeclaringClass<kReadBarrierOption>()->IsProxyClass();
 }
 
 inline ArtMethod* ArtMethod::GetInterfaceMethodIfProxy(size_t pointer_size) {
@@ -442,24 +443,24 @@
   return type;
 }
 
-template<typename RootVisitorType>
+template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
 void ArtMethod::VisitRoots(RootVisitorType& visitor, size_t pointer_size) {
-  ArtMethod* interface_method = nullptr;
-  mirror::Class* klass = declaring_class_.Read();
-  if (LIKELY(klass != nullptr)) {
+  if (LIKELY(!declaring_class_.IsNull())) {
+    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
+    mirror::Class* klass = declaring_class_.Read<kReadBarrierOption>();
     if (UNLIKELY(klass->IsProxyClass())) {
       // For normal methods, dex cache shortcuts will be visited through the declaring class.
       // However, for proxies we need to keep the interface method alive, so we visit its roots.
-      interface_method = mirror::DexCache::GetElementPtrSize(
+      ArtMethod* interface_method = mirror::DexCache::GetElementPtrSize(
           GetDexCacheResolvedMethods(pointer_size),
           GetDexMethodIndex(),
           pointer_size);
       DCHECK(interface_method != nullptr);
       DCHECK_EQ(interface_method,
-                Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
+                Runtime::Current()->GetClassLinker()->FindMethodForProxy<kReadBarrierOption>(
+                    klass, this));
       interface_method->VisitRoots(visitor, pointer_size);
     }
-    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
     // We know we don't have profiling information if the class hasn't been verified. Note
     // that this check also ensures the IsNative call can be made, as IsNative expects a fully
     // created class (and not a retired one).
@@ -467,7 +468,7 @@
       // Runtime methods and native methods use the same field as the profiling info for
       // storing their own data (jni entrypoint for native methods, and ImtConflictTable for
       // some runtime methods).
-      if (!IsNative() && !IsRuntimeMethod()) {
+      if (!IsNative<kReadBarrierOption>() && !IsRuntimeMethod()) {
         ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
         if (profiling_info != nullptr) {
           profiling_info->VisitRoots(visitor);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 2c40d6d..849af97 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -356,6 +356,7 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool SkipAccessChecks() {
@@ -582,7 +583,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
-  template<typename RootVisitorType>
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename RootVisitorType>
   void VisitRoots(RootVisitorType& visitor, size_t pointer_size) NO_THREAD_SAFETY_ANALYSIS;
 
   const DexFile* GetDexFile() SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 8eb3742..480644a 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -128,7 +128,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 166 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_pos.
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index f3e260b..f2575f7 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -225,6 +225,34 @@
   return klass;
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
+  DCHECK(proxy_class->IsProxyClass());
+  DCHECK(proxy_method->IsProxyMethod<kReadBarrierOption>());
+  {
+    Thread* const self = Thread::Current();
+    ReaderMutexLock mu(self, dex_lock_);
+    // Locate the dex cache of the original interface/Object
+    for (const DexCacheData& data : dex_caches_) {
+      if (!self->IsJWeakCleared(data.weak_root) &&
+          proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
+                                                     image_pointer_size_)) {
+        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+            self->DecodeJObject(data.weak_root));
+        if (dex_cache != nullptr) {
+          ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
+              proxy_method->GetDexMethodIndex(), image_pointer_size_);
+          CHECK(resolved_method != nullptr);
+          return resolved_method;
+        }
+      }
+    }
+  }
+  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
+      << PrettyMethod(proxy_method);
+  UNREACHABLE();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CLASS_LINKER_INL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 4259aea..7c00315 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4277,33 +4277,6 @@
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
 
-ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
-  DCHECK(proxy_class->IsProxyClass());
-  DCHECK(proxy_method->IsProxyMethod());
-  {
-    Thread* const self = Thread::Current();
-    ReaderMutexLock mu(self, dex_lock_);
-    // Locate the dex cache of the original interface/Object
-    for (const DexCacheData& data : dex_caches_) {
-      if (!self->IsJWeakCleared(data.weak_root) &&
-          proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
-                                                     image_pointer_size_)) {
-        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
-            self->DecodeJObject(data.weak_root));
-        if (dex_cache != nullptr) {
-          ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
-              proxy_method->GetDexMethodIndex(), image_pointer_size_);
-          CHECK(resolved_method != nullptr);
-          return resolved_method;
-        }
-      }
-    }
-  }
-  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
-      << PrettyMethod(proxy_method);
-  UNREACHABLE();
-}
-
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
   CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 18u);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 4832d32..d6822c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -483,6 +483,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ArtMethod* FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index f58af5a..5bdb36c 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -418,26 +418,6 @@
   (*icu_cleanup_fn)();
 
   Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
-
-  // Manually closing the JNI libraries.
-  // Runtime does not support repeatedly doing JNI->CreateVM, thus we need to manually clean up the
-  // dynamic linking loader so that gtests would not fail.
-  // Bug: 25785594
-  if (runtime_->IsStarted()) {
-    {
-      // We retrieve the handle by calling dlopen on the library. To close it, we need to call
-      // dlclose twice, the first time to undo our dlopen and the second time to actually unload it.
-      // See man dlopen.
-      void* handle = dlopen("libjavacore.so", RTLD_LAZY);
-      dlclose(handle);
-      CHECK_EQ(0, dlclose(handle));
-    }
-    {
-      void* handle = dlopen("libopenjdkd.so", RTLD_LAZY);
-      dlclose(handle);
-      CHECK_EQ(0, dlclose(handle));
-    }
-  }
 }
 
 static std::string GetDexFileName(const std::string& jar_prefix, bool host) {
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c621672..91deea0 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -112,10 +112,12 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_functions,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_function,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, jni_entrypoints,
-                        sizeof(void*) * 6);
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
+                        sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, jni_entrypoints,
+                        sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
 
     // Skip across the entrypoints structures.
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 894ceba..ac5931f 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -444,27 +444,8 @@
       }
       PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
       MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
-      {
-        LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
-        Thread* self = Thread::Current();
-        if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-          mark_sweep_->VerifyRoots();
-        } else {
-          const bool heap_bitmap_exclusive_locked =
-              Locks::heap_bitmap_lock_->IsExclusiveHeld(self);
-          if (heap_bitmap_exclusive_locked) {
-            Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
-          }
-          {
-            ScopedThreadSuspension(self, kSuspended);
-            ScopedSuspendAll ssa(__FUNCTION__);
-            mark_sweep_->VerifyRoots();
-          }
-          if (heap_bitmap_exclusive_locked) {
-            Locks::heap_bitmap_lock_->ExclusiveLock(self);
-          }
-        }
-      }
+      LOG(INTERNAL_FATAL) << "Attempting see if it's a bad thread root\n";
+      mark_sweep_->VerifySuspendedThreadRoots();
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
@@ -591,15 +572,15 @@
     if (heap->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
       space::LargeObjectSpace* large_object_space = heap->GetLargeObjectsSpace();
       if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-        LOG(INTERNAL_FATAL) << "Found invalid root: " << root << " " << info;
+        LOG(INTERNAL_FATAL) << "Found invalid root: " << root << " " << info << "\n";
       }
     }
   }
 };
 
-void MarkSweep::VerifyRoots() {
+void MarkSweep::VerifySuspendedThreadRoots() {
   VerifyRootVisitor visitor;
-  Runtime::Current()->GetThreadList()->VisitRoots(&visitor);
+  Runtime::Current()->GetThreadList()->VisitRootsForSuspendedThreads(&visitor);
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index c19107a..7168f96 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -250,8 +250,8 @@
 
   // Verify the roots of the heap and print out information related to any invalid roots.
   // Called in MarkObject, so may we may not hold the mutator lock.
-  void VerifyRoots()
-      NO_THREAD_SAFETY_ANALYSIS;
+  void VerifySuspendedThreadRoots()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
   void ExpandMarkStack()
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1ebe5cc..6a61d78 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1178,8 +1178,9 @@
       return nullptr;
     }
   }
-  ImageHeader temp_image_header;
-  ImageHeader* image_header = &temp_image_header;
+  // unique_ptr to reduce frame size.
+  std::unique_ptr<ImageHeader> temp_image_header(new ImageHeader);
+  ImageHeader* image_header = temp_image_header.get();
   {
     TimingLogger::ScopedTiming timing("ReadImageHeader", &logger);
     bool success = file->ReadFully(image_header, sizeof(*image_header));
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index d983a9f..c216412 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -74,6 +74,10 @@
     if (self != nullptr) {
       self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_);
     }
+
+    if (!needs_native_bridge_) {
+      android::CloseNativeLibrary(handle_);
+    }
   }
 
   jweak GetClassLoader() const {
@@ -271,8 +275,7 @@
       REQUIRES(!Locks::jni_libraries_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
-    std::vector<JNI_OnUnloadFn> unload_functions;
+    std::vector<SharedLibrary*> unload_libraries;
     {
       MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
       for (auto it = libraries_.begin(); it != libraries_.end(); ) {
@@ -283,15 +286,7 @@
         // the native libraries of the boot class loader.
         if (class_loader != nullptr &&
             soa.Self()->IsJWeakCleared(class_loader)) {
-          void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
-          if (sym == nullptr) {
-            VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
-          } else {
-            VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]";
-            JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
-            unload_functions.push_back(jni_on_unload);
-          }
-          delete library;
+          unload_libraries.push_back(library);
           it = libraries_.erase(it);
         } else {
           ++it;
@@ -299,9 +294,17 @@
       }
     }
     // Do this without holding the jni libraries lock to prevent possible deadlocks.
-    for (JNI_OnUnloadFn fn : unload_functions) {
-      VLOG(jni) << "Calling JNI_OnUnload";
-      (*fn)(soa.Vm(), nullptr);
+    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
+    for (auto library : unload_libraries) {
+      void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
+      if (sym == nullptr) {
+        VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
+      } else {
+        VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]: Calling...";
+        JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
+        jni_on_unload(soa.Vm(), nullptr);
+      }
+      delete library;
     }
   }
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 0d95bb1..b783a01 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -521,7 +521,7 @@
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
   Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
-      ? MemberOffset(RoundUp(super_class->GetObjectSize(),
+      ? MemberOffset(RoundUp(super_class->GetObjectSize<kVerifyFlags, kReadBarrierOption>(),
                              sizeof(mirror::HeapReference<mirror::Object>)))
       : ClassOffset();
 }
@@ -766,7 +766,8 @@
   }
   if (kVisitNativeRoots) {
     // Since this class is reachable, we must also visit the associated roots when we scan it.
-    VisitNativeRoots(visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+    VisitNativeRoots<kReadBarrierOption>(
+        visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
   }
 }
 
@@ -905,24 +906,24 @@
   return GetFieldPtr<GcRoot<String>*>(DexCacheStringsOffset());
 }
 
-template<class Visitor>
+template<ReadBarrierOption kReadBarrierOption, class Visitor>
 void mirror::Class::VisitNativeRoots(Visitor& visitor, size_t pointer_size) {
   for (ArtField& field : GetSFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
     if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
     }
   }
   for (ArtField& field : GetIFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
     if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
     }
   }
   for (ArtMethod& method : GetMethods(pointer_size)) {
-    method.VisitRoots(visitor, pointer_size);
+    method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
   }
 }
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index b1abf94..9670acc 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1152,7 +1152,7 @@
 
   // Visit native roots visits roots which are keyed off the native pointers such as ArtFields and
   // ArtMethods.
-  template<class Visitor>
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, class Visitor>
   void VisitNativeRoots(Visitor& visitor, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/oat.h b/runtime/oat.h
index 57675dc..286394e 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '0', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index f1f4a12..b9ee442 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -924,10 +924,22 @@
 
   Runtime* runtime = Runtime::Current();
   bool implicit_stack_check = !runtime->ExplicitStackOverflowChecks() && !runtime->IsAotCompiler();
+
+  // Valgrind on arm doesn't give the right values here. Do not install the guard page, and
+  // effectively disable stack overflow checks (we'll get segfaults, potentially) by setting
+  // stack_begin to 0.
+  const bool valgrind_on_arm =
+      (kRuntimeISA == kArm || kRuntimeISA == kArm64) &&
+      kMemoryToolIsValgrind &&
+      RUNNING_ON_MEMORY_TOOL != 0;
+  if (valgrind_on_arm) {
+    tlsPtr_.stack_begin = nullptr;
+  }
+
   ResetDefaultStackEnd();
 
   // Install the protected region if we are doing implicit overflow checks.
-  if (implicit_stack_check) {
+  if (implicit_stack_check && !valgrind_on_arm) {
     // The thread might have protected region at the bottom.  We need
     // to install our own region so we need to move the limits
     // of the stack to make room for it.
@@ -1122,32 +1134,36 @@
 }
 
 void Thread::RunCheckpointFunction() {
-  Closure *checkpoints[kMaxCheckpoints];
-
-  // Grab the suspend_count lock and copy the current set of
-  // checkpoints.  Then clear the list and the flag.  The RequestCheckpoint
-  // function will also grab this lock so we prevent a race between setting
-  // the kCheckpointRequest flag and clearing it.
-  {
-    MutexLock mu(this, *Locks::thread_suspend_count_lock_);
-    for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-      checkpoints[i] = tlsPtr_.checkpoint_functions[i];
-      tlsPtr_.checkpoint_functions[i] = nullptr;
+  bool done = false;
+  do {
+    // Grab the suspend_count lock and copy the checkpoints one by one. When the last checkpoint is
+    // copied, clear the list and the flag. The RequestCheckpoint function will also grab this lock
+    // to prevent a race between setting the kCheckpointRequest flag and clearing it.
+    Closure* checkpoint = nullptr;
+    {
+      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+      if (tlsPtr_.checkpoint_function != nullptr) {
+        checkpoint = tlsPtr_.checkpoint_function;
+        if (!checkpoint_overflow_.empty()) {
+          // Overflow list not empty, copy the first one out and continue.
+          tlsPtr_.checkpoint_function = checkpoint_overflow_.front();
+          checkpoint_overflow_.pop_front();
+        } else {
+          // No overflow checkpoints, this means that we are on the last pending checkpoint.
+          tlsPtr_.checkpoint_function = nullptr;
+          AtomicClearFlag(kCheckpointRequest);
+          done = true;
+        }
+      } else {
+        LOG(FATAL) << "Checkpoint flag set without pending checkpoint";
+      }
     }
-    AtomicClearFlag(kCheckpointRequest);
-  }
 
-  // Outside the lock, run all the checkpoint functions that
-  // we collected.
-  bool found_checkpoint = false;
-  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-    if (checkpoints[i] != nullptr) {
-      ScopedTrace trace("Run checkpoint function");
-      checkpoints[i]->Run(this);
-      found_checkpoint = true;
-    }
-  }
-  CHECK(found_checkpoint);
+    // Outside the lock, run the checkpoint functions that we collected.
+    ScopedTrace trace("Run checkpoint function");
+    DCHECK(checkpoint != nullptr);
+    checkpoint->Run(this);
+  } while (!done);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
@@ -1157,20 +1173,6 @@
     return false;  // Fail, thread is suspended and so can't run a checkpoint.
   }
 
-  uint32_t available_checkpoint = kMaxCheckpoints;
-  for (uint32_t i = 0 ; i < kMaxCheckpoints; ++i) {
-    if (tlsPtr_.checkpoint_functions[i] == nullptr) {
-      available_checkpoint = i;
-      break;
-    }
-  }
-  if (available_checkpoint == kMaxCheckpoints) {
-    // No checkpoint functions available, we can't run a checkpoint
-    return false;
-  }
-  tlsPtr_.checkpoint_functions[available_checkpoint] = function;
-
-  // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
   union StateAndFlags new_state_and_flags;
@@ -1178,11 +1180,13 @@
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   bool success = tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(
       old_state_and_flags.as_int, new_state_and_flags.as_int);
-  if (UNLIKELY(!success)) {
-    // The thread changed state before the checkpoint was installed.
-    CHECK_EQ(tlsPtr_.checkpoint_functions[available_checkpoint], function);
-    tlsPtr_.checkpoint_functions[available_checkpoint] = nullptr;
-  } else {
+  if (success) {
+    // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+    if (tlsPtr_.checkpoint_function == nullptr) {
+      tlsPtr_.checkpoint_function = function;
+    } else {
+      checkpoint_overflow_.push_back(function);
+    }
     CHECK_EQ(ReadFlag(kCheckpointRequest), true);
     TriggerSuspend();
   }
@@ -1624,9 +1628,7 @@
   std::fill(tlsPtr_.rosalloc_runs,
             tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
-  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
-    tlsPtr_.checkpoint_functions[i] = nullptr;
-  }
+  tlsPtr_.checkpoint_function = nullptr;
   for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
     tlsPtr_.active_suspend_barriers[i] = nullptr;
   }
@@ -1767,9 +1769,8 @@
   }
   CHECK_NE(GetState(), kRunnable);
   CHECK_NE(ReadFlag(kCheckpointRequest), true);
-  CHECK(tlsPtr_.checkpoint_functions[0] == nullptr);
-  CHECK(tlsPtr_.checkpoint_functions[1] == nullptr);
-  CHECK(tlsPtr_.checkpoint_functions[2] == nullptr);
+  CHECK(tlsPtr_.checkpoint_function == nullptr);
+  CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
   CHECK_EQ(tls32_.suspended_at_suspend_check, false);
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 3c367ee..7ae9be5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1220,9 +1220,6 @@
 
   static void ThreadExitCallback(void* arg);
 
-  // Maximum number of checkpoint functions.
-  static constexpr uint32_t kMaxCheckpoints = 3;
-
   // Maximum number of suspend barriers.
   static constexpr uint32_t kMaxSuspendBarriers = 3;
 
@@ -1452,9 +1449,9 @@
     // If no_thread_suspension_ is > 0, what is causing that assertion.
     const char* last_no_thread_suspension_cause;
 
-    // Pending checkpoint function or null if non-pending. Installation guarding by
-    // Locks::thread_suspend_count_lock_.
-    Closure* checkpoint_functions[kMaxCheckpoints];
+    // Pending checkpoint function or null if non-pending. If this checkpoint is set and someone\
+    // requests another checkpoint, it goes to the checkpoint overflow list.
+    Closure* checkpoint_function GUARDED_BY(Locks::thread_suspend_count_lock_);
 
     // Pending barriers that require passing or NULL if non-pending. Installation guarding by
     // Locks::thread_suspend_count_lock_.
@@ -1517,6 +1514,9 @@
   // Debug disable read barrier count, only is checked for debug builds and only in the runtime.
   uint8_t debug_disallow_read_barrier_ = 0;
 
+  // Pending extra checkpoints if checkpoint_function_ is already used.
+  std::list<Closure*> checkpoint_overflow_ GUARDED_BY(Locks::thread_suspend_count_lock_);
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 80b99fc..d90bd8d 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1300,6 +1300,39 @@
   }
 }
 
+void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
+  Thread* const self = Thread::Current();
+  std::vector<Thread*> threads_to_visit;
+
+  // Tell threads to suspend and copy them into list.
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : list_) {
+      thread->ModifySuspendCount(self, +1, nullptr, false);
+      if (thread == self || thread->IsSuspended()) {
+        threads_to_visit.push_back(thread);
+      } else {
+        thread->ModifySuspendCount(self, -1, nullptr, false);
+      }
+    }
+  }
+
+  // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
+  // order violations.
+  for (Thread* thread : threads_to_visit) {
+    thread->VisitRoots(visitor);
+  }
+
+  // Restore suspend counts.
+  {
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : threads_to_visit) {
+      thread->ModifySuspendCount(self, -1, nullptr, false);
+    }
+  }
+}
+
 void ThreadList::VisitRoots(RootVisitor* visitor) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index df81ad1..49f65e1 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -144,6 +144,10 @@
   void VisitRoots(RootVisitor* visitor) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void VisitRootsForSuspendedThreads(RootVisitor* visitor)
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Return a copy of the thread list.
   std::list<Thread*> GetList() REQUIRES(Locks::thread_list_lock_) {
     return list_;
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
index c9110a9..b729301 100644
--- a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -27,8 +27,20 @@
 namespace {
 
 static volatile std::atomic<bool> vm_was_shutdown(false);
+static const int kThreadCount = 4;
+
+static std::atomic<int> barrier_count(kThreadCount + 1);
+
+static void JniThreadBarrierWait() {
+  barrier_count--;
+  while (barrier_count.load() != 0) {
+    usleep(1000);
+  }
+}
 
 extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
   // Wait until the runtime is shutdown.
   while (!vm_was_shutdown.load()) {
     usleep(1000);
@@ -40,6 +52,8 @@
 
 // NO_RETURN does not work with extern "C" for target builds.
 extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
   // Fake up the managed stack so we can detach.
   Thread* const self = Thread::Current();
   self->SetTopOfStack(nullptr);
diff --git a/test/149-suspend-all-stress/check b/test/149-suspend-all-stress/check
new file mode 100755
index 0000000..d30b888
--- /dev/null
+++ b/test/149-suspend-all-stress/check
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Only compare the last line.
+tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/149-suspend-all-stress/expected.txt b/test/149-suspend-all-stress/expected.txt
index f993efc..134d8d0 100644
--- a/test/149-suspend-all-stress/expected.txt
+++ b/test/149-suspend-all-stress/expected.txt
@@ -1,2 +1 @@
-JNI_OnLoad called
 Finishing
diff --git a/test/149-suspend-all-stress/src/Main.java b/test/149-suspend-all-stress/src/Main.java
index aa94fc9..6a27c4b 100644
--- a/test/149-suspend-all-stress/src/Main.java
+++ b/test/149-suspend-all-stress/src/Main.java
@@ -17,7 +17,7 @@
 import java.util.Map;
 
 public class Main implements Runnable {
-    static final int numberOfThreads = 4;
+    static final int numberOfThreads = 8;
 
     public static void main(String[] args) throws Exception {
         System.loadLibrary(args[0]);
diff --git a/test/149-suspend-all-stress/suspend_all.cc b/test/149-suspend-all-stress/suspend_all.cc
index de479a5..dfd944a 100644
--- a/test/149-suspend-all-stress/suspend_all.cc
+++ b/test/149-suspend-all-stress/suspend_all.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/time_utils.h"
 #include "jni.h"
 #include "runtime.h"
 #include "thread_list.h"
@@ -22,7 +23,6 @@
 
 extern "C" JNIEXPORT void JNICALL Java_Main_suspendAndResume(JNIEnv*, jclass) {
   static constexpr size_t kInitialSleepUS = 100 * 1000;  // 100ms.
-  static constexpr size_t kIterations = 500;
   usleep(kInitialSleepUS);  // Leave some time for threads to get in here before we start suspending.
   enum Operation {
     kOPSuspendAll,
@@ -31,8 +31,11 @@
     // Total number of operations.
     kOPNumber,
   };
-  for (size_t i = 0; i < kIterations; ++i) {
-    switch (static_cast<Operation>(i % kOPNumber)) {
+  const uint64_t start_time = NanoTime();
+  size_t iterations = 0;
+  // Run for a fixed period of 10 seconds.
+  while (NanoTime() - start_time < MsToNs(10 * 1000)) {
+    switch (static_cast<Operation>(iterations % kOPNumber)) {
       case kOPSuspendAll: {
         ScopedSuspendAll ssa(__FUNCTION__);
         usleep(500);
@@ -52,7 +55,9 @@
       case kOPNumber:
         break;
     }
+    ++iterations;
   }
+  LOG(INFO) << "Did " << iterations << " iterations";
 }
 
 }  // namespace art
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index feee7c2..8598474 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -87,7 +87,11 @@
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libarttest.mk
   ifeq ($$(art_target_or_host),target)
     $(call set-target-local-clang-vars)
-    $(call set-target-local-cflags-vars,debug)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
     LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
diff --git a/test/valgrind-target-suppressions.txt b/test/valgrind-target-suppressions.txt
index 896850c..7ae6d53 100644
--- a/test/valgrind-target-suppressions.txt
+++ b/test/valgrind-target-suppressions.txt
@@ -40,3 +40,13 @@
    fun:je_tsd_fetch
    fun:je_malloc_tsd_boot0
 }
+
+# Setenv is known-leaking when overwriting mappings. This is triggered by re-initializing
+# ANDROID_DATA. Ignore all setenv leaks.
+{
+   SetenvAndroidDataReinit
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:setenv
+}
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 304c2a9..d88a4a0 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -51,7 +51,7 @@
   make_command+=" ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
   make_command="make $j_arg $showcommands build-art-target-tests $common_targets"
-  make_command+=" libjavacrypto libjavacoretests linker toybox toolbox sh"
+  make_command+=" libjavacrypto libjavacoretests libnetd_client linker toybox toolbox sh"
   make_command+=" ${out_dir}/host/linux-x86/bin/adb libstdc++ "
   make_command+=" ${out_dir}/target/product/${TARGET_PRODUCT}/system/etc/public.libraries.txt"
 fi