Merge "Create a scoped arena allocator and use that for LVN."
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 0522456..f58aabc 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -173,22 +173,24 @@
 endif
 ART_TARGET_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
 
-# Enable thread-safety for GCC 4.6 on the target but not for GCC 4.7 where this feature was removed.
+# Enable thread-safety for GCC 4.6, and clang, but not for GCC 4.7 or later where this feature was
+# removed. Warn when -Wthread-safety is not used.
 ifneq ($(filter 4.6 4.6.%, $(TARGET_GCC_VERSION)),)
   ART_TARGET_CFLAGS += -Wthread-safety
 else
-  # Warn if not using GCC 4.6 for target builds when not doing a top-level or 'mma' build.
-  ifneq ($(ONE_SHOT_MAKEFILE),)
-    # Enable target GCC 4.6 with: export TARGET_GCC_VERSION_EXP=4.6
-    $(info Using target GCC $(TARGET_GCC_VERSION) disables thread-safety checks.)
+  ifeq ($(ART_TARGET_CLANG),true)
+    ART_TARGET_CFLAGS += -Wthread-safety
+  else
+    # Warn if -Wthread-safety is not suport and not doing a top-level or 'mma' build.
+    ifneq ($(ONE_SHOT_MAKEFILE),)
+      # Enable target GCC 4.6 with: export TARGET_GCC_VERSION_EXP=4.6
+      $(info Using target GCC $(TARGET_GCC_VERSION) disables thread-safety checks.)
+    endif
   endif
 endif
-# We build with GCC 4.6 on the host.
+# We compile with GCC 4.6 or clang on the host, both of which support -Wthread-safety.
 ART_HOST_CFLAGS += -Wthread-safety
 
-# Make host builds easier to debug and profile by not omitting the frame pointer.
-ART_HOST_CFLAGS += -fno-omit-frame-pointer
-
 # To use oprofile_android --callgraph, uncomment this and recompile with "mmm art -B -j16"
 # ART_TARGET_CFLAGS += -fno-omit-frame-pointer -marm -mapcs
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 691cee0..fdc8540 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -261,12 +261,6 @@
 
 endef
 
-ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-libart-compiler,target,ndebug))
-endif
-ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-libart-compiler,target,debug))
-endif
 ifeq ($(WITH_HOST_DALVIK),true)
   # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
   ifeq ($(ART_BUILD_NDEBUG),true)
@@ -276,6 +270,12 @@
     $(eval $(call build-libart-compiler,host,debug))
   endif
 endif
+ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
+  $(eval $(call build-libart-compiler,target,ndebug))
+endif
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+  $(eval $(call build-libart-compiler,target,debug))
+endif
 
 # Rule to build /system/lib/libcompiler_rt.a
 # Usually static libraries are not installed on the device.
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index bca72b8..def7b68 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -35,9 +35,9 @@
 // A signal handler called when have an illegal instruction.  We record the fact in
 // a global boolean and then increment the PC in the signal context to return to
 // the next instruction.  We know the instruction is an sdiv (4 bytes long).
-static void baddivideinst(int signo, siginfo *si, void *data) {
-  (void)signo;
-  (void)si;
+static inline void baddivideinst(int signo, siginfo *si, void *data) {
+  UNUSED(signo);
+  UNUSED(si);
   struct ucontext *uc = (struct ucontext *)data;
   struct sigcontext *sc = &uc->uc_mcontext;
   sc->arm_r0 = 0;     // set R0 to #0 to signal error
@@ -56,7 +56,7 @@
 
 extern "C" bool CheckForARMSDIVInstruction();
 
-static InstructionSetFeatures GuessInstructionFeatures() {
+static inline InstructionSetFeatures GuessInstructionFeatures() {
   InstructionSetFeatures f;
 
   // Uncomment this for processing of /proc/cpuinfo.
@@ -107,7 +107,7 @@
 // Given a set of instruction features from the build, parse it.  The
 // input 'str' is a comma separated list of feature names.  Parse it and
 // return the InstructionSetFeatures object.
-static InstructionSetFeatures ParseFeatureList(std::string str) {
+static inline InstructionSetFeatures ParseFeatureList(std::string str) {
   InstructionSetFeatures result;
   typedef std::vector<std::string> FeatureList;
   FeatureList features;
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index cb424d9..7423393 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -18,7 +18,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/mutex-inl.h"
-#include "locks.h"
 #include "thread.h"
 #include "thread-inl.h"
 #include "dex/mir_graph.h"
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index b4d8dd6..4aff01c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -23,7 +23,6 @@
 #include "safe_map.h"
 #include "dex/compiler_enums.h"
 #include "dex_file.h"
-#include "locks.h"
 
 namespace art {
 
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index aa0e72a..257e70c 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -19,6 +19,7 @@
 
 #include <vector>
 
+#include "base/mutex.h"
 #include "method_reference.h"
 #include "safe_map.h"
 
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 3610d1a..03b965a 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "os.h"
 
 namespace art {
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 21245db..cb5aa27 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 #include <vector>
 
-#include "locks.h"
 #include "driver/compiler_driver.h"
 
 namespace art {
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 828dffa..dbd078a 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -1550,6 +1550,9 @@
   CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
   LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
                  base.AsArm().AsCoreRegister(), offs.Int32Value());
+  if (kPoisonHeapReferences) {
+    rsb(dst.AsCoreRegister(), dst.AsCoreRegister(), ShifterOperand(0));
+  }
 }
 
 void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
diff --git a/compiler/utils/arm64/managed_register_arm64.cc b/compiler/utils/arm64/managed_register_arm64.cc
index cc0b509..de5cb8c 100644
--- a/compiler/utils/arm64/managed_register_arm64.cc
+++ b/compiler/utils/arm64/managed_register_arm64.cc
@@ -27,10 +27,10 @@
 //  * [W0, W15]
 //  * [D0, D31]
 //  * [S0, S31]
-static const int kNumberOfAvailableCoreRegisters = (X15 - X0) + 1;
-static const int kNumberOfAvailableWRegisters = (W15 - W0) + 1;
-static const int kNumberOfAvailableDRegisters = kNumberOfDRegisters;
-static const int kNumberOfAvailableSRegisters = kNumberOfSRegisters;
+// static const int kNumberOfAvailableCoreRegisters = (X15 - X0) + 1;
+// static const int kNumberOfAvailableWRegisters = (W15 - W0) + 1;
+// static const int kNumberOfAvailableDRegisters = kNumberOfDRegisters;
+// static const int kNumberOfAvailableSRegisters = kNumberOfSRegisters;
 
 // Returns true if this managed-register overlaps the other managed-register.
 // GP Register Bank:
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index fdd2bab..ce21b84 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -684,6 +684,9 @@
   CHECK(dest.IsCoreRegister() && dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
+  if (kPoisonHeapReferences) {
+    Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister());
+  }
 }
 
 void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 26300e0..db8956d 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1560,6 +1560,9 @@
   X86ManagedRegister dest = mdest.AsX86();
   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
   movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
+  if (kPoisonHeapReferences) {
+    negl(dest.AsCpuRegister());
+  }
 }
 
 void X86Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
diff --git a/runtime/Android.mk b/runtime/Android.mk
index bb1bc99..18e2d3e 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -84,7 +84,6 @@
 	jdwp/object_registry.cc \
 	jni_internal.cc \
 	jobject_comparator.cc \
-	locks.cc \
 	mem_map.cc \
 	memory_region.cc \
 	mirror/art_field.cc \
@@ -289,7 +288,6 @@
 	invoke_type.h \
 	jdwp/jdwp.h \
 	jdwp/jdwp_constants.h \
-	locks.h \
 	lock_word.h \
 	mirror/class.h \
 	oat.h \
@@ -425,14 +423,8 @@
   endif
 endef
 
-ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-libart,target,ndebug,$(ART_TARGET_CLANG)))
-endif
-ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-libart,target,debug,$(ART_TARGET_CLANG)))
-endif
-
-# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
+# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
+# they are used to cross compile for the target.
 ifeq ($(WITH_HOST_DALVIK),true)
   ifeq ($(ART_BUILD_NDEBUG),true)
     $(eval $(call build-libart,host,ndebug,$(ART_HOST_CLANG)))
@@ -441,3 +433,10 @@
     $(eval $(call build-libart,host,debug,$(ART_HOST_CLANG)))
   endif
 endif
+
+ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
+  $(eval $(call build-libart,target,ndebug,$(ART_TARGET_CLANG)))
+endif
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+  $(eval $(call build-libart,target,debug,$(ART_TARGET_CLANG)))
+endif
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 4a0d082..2ccce8d 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_ARCH_ARM_CONTEXT_ARM_H_
 #define ART_RUNTIME_ARCH_ARM_CONTEXT_ARM_H_
 
-#include "locks.h"
 #include "arch/context.h"
 #include "base/logging.h"
 #include "registers_arm.h"
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 83bbb11..f7b7835 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -20,7 +20,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include "locks.h"
+#include "base/mutex.h"
 
 namespace art {
 
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index b59c0cb..14975da 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -19,38 +19,25 @@
 
 #include "asm_support_x86_64.h"
 
-#if defined(__APPLE__)
-    // Mac OS' as(1) doesn't let you name macro parameters.
+#if defined(__clang__)
+    // Clang's as(1) doesn't let you name macro parameters.
     #define MACRO0(macro_name) .macro macro_name
     #define MACRO1(macro_name, macro_arg1) .macro macro_name
     #define MACRO2(macro_name, macro_arg1, macro_args2) .macro macro_name
     #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name
     #define END_MACRO .endmacro
 
-    // Mac OS' as(1) uses $0, $1, and so on for macro arguments, and function names
-    // are mangled with an extra underscore prefix. The use of $x for arguments
-    // mean that literals need to be represented with $$x in macros.
-    #define SYMBOL(name) _ ## name
-    #define PLT_SYMBOL(name) _ ## name
+    // Clang's as(1) uses $0, $1, and so on for macro arguments.
     #define VAR(name,index) SYMBOL($index)
     #define PLT_VAR(name, index) SYMBOL($index)
     #define REG_VAR(name,index) %$index
     #define CALL_MACRO(name,index) $index
+    #define FUNCTION_TYPE(name,index) .type $index, @function
+    #define SIZE(name,index) .size $index, .-$index
+
+    //  The use of $x for arguments mean that literals need to be represented with $$x in macros.
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $$value
-
-    // Mac OS' doesn't like cfi_* directives
-    #define CFI_STARTPROC
-    #define CFI_ENDPROC
-    #define CFI_ADJUST_CFA_OFFSET(size)
-    #define CFI_DEF_CFA(reg,size)
-    #define CFI_DEF_CFA_REGISTER(reg)
-    #define CFI_RESTORE(reg)
-    #define CFI_REL_OFFSET(reg,size)
-
-    // Mac OS' doesn't support certain directives
-    #define FUNCTION_TYPE(name)
-    #define SIZE(name)
 #else
     // Regular gas(1) lets you name macro parameters.
     #define MACRO0(macro_name) .macro macro_name
@@ -65,16 +52,19 @@
     // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
     // special character meaning care needs to be taken when passing registers as macro arguments.
     .altmacro
-    #define SYMBOL(name) name
-    #define PLT_SYMBOL(name) name@PLT
     #define VAR(name,index) name&
     #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
     #define CALL_MACRO(name,index) name&
+    #define FUNCTION_TYPE(name,index) .type name&, @function
+    #define SIZE(name,index) .size name, .-name
+
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $value
+#endif
 
-    // CFI support
+    // CFI support.
+#if !defined(__APPLE__)
     #define CFI_STARTPROC .cfi_startproc
     #define CFI_ENDPROC .cfi_endproc
     #define CFI_ADJUST_CFA_OFFSET(size) .cfi_adjust_cfa_offset size
@@ -82,9 +72,25 @@
     #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
     #define CFI_RESTORE(reg) .cfi_restore reg
     #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+#else
+    // Mac OS' doesn't like cfi_* directives.
+    #define CFI_STARTPROC
+    #define CFI_ENDPROC
+    #define CFI_ADJUST_CFA_OFFSET(size)
+    #define CFI_DEF_CFA(reg,size)
+    #define CFI_DEF_CFA_REGISTER(reg)
+    #define CFI_RESTORE(reg)
+    #define CFI_REL_OFFSET(reg,size)
+#endif
 
-    #define FUNCTION_TYPE(name) .type name&, @function
-    #define SIZE(name) .size name, .-name
+    // Symbols.
+#if !defined(__APPLE__)
+    #define SYMBOL(name) name
+    #define PLT_SYMBOL(name) name ## @PLT
+#else
+    // Mac OS' symbols have an _ prefix.
+    #define SYMBOL(name) _ ## name
+    #define PLT_SYMBOL(name) _ ## name
 #endif
 
     /* Cache alignment for function entry */
@@ -93,7 +99,7 @@
 END_MACRO
 
 MACRO1(DEFINE_FUNCTION, c_name)
-    FUNCTION_TYPE(\c_name)
+    FUNCTION_TYPE(\c_name, 0)
     .globl VAR(c_name, 0)
     ALIGN_FUNCTION_ENTRY
 VAR(c_name, 0):
@@ -102,7 +108,7 @@
 
 MACRO1(END_FUNCTION, c_name)
     CFI_ENDPROC
-    SIZE(\c_name)
+    SIZE(\c_name, 0)
 END_MACRO
 
 MACRO1(PUSH, reg)
@@ -118,7 +124,7 @@
 END_MACRO
 
 MACRO1(UNIMPLEMENTED,name)
-    FUNCTION_TYPE(\name)
+    FUNCTION_TYPE(\name, 0)
     .globl VAR(name, 0)
     ALIGN_FUNCTION_ENTRY
 VAR(name, 0):
@@ -126,21 +132,7 @@
     int3
     int3
     CFI_ENDPROC
-    SIZE(\name)
-END_MACRO
-
-MACRO0(SETUP_GOT_NOSAVE)
-    call __x86.get_pc_thunk.bx
-    addl $_GLOBAL_OFFSET_TABLE_, %ebx
-END_MACRO
-
-MACRO0(SETUP_GOT)
-    PUSH  ebx
-    SETUP_GOT_NOSAVE
-END_MACRO
-
-MACRO0(UNDO_SETUP_GOT)
-    POP  ebx
+    SIZE(\name, 0)
 END_MACRO
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 863fa31..da09861 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -33,7 +33,7 @@
     PUSH r12  // Callee save.
     PUSH rbp  // Callee save.
     PUSH rbx  // Callee save.
-    subq LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
+    subq MACRO_LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
     CFI_ADJUST_CFA_OFFSET(8)
     // R10 := ArtMethod* for ref and args callee save frame method.
     movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
@@ -76,7 +76,7 @@
     PUSH rdx  // Quick arg 2.
     PUSH rcx  // Quick arg 3.
     // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
-    subq LITERAL(80), %rsp
+    subq MACRO_LITERAL(80), %rsp
     CFI_ADJUST_CFA_OFFSET(80)
     // R10 := ArtMethod* for ref and args callee save frame method.
     movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
@@ -103,7 +103,7 @@
     movq 56(%rsp), %xmm5
     movq 64(%rsp), %xmm6
     movq 72(%rsp), %xmm7
-    addq LITERAL(80), %rsp
+    addq MACRO_LITERAL(80), %rsp
     CFI_ADJUST_CFA_OFFSET(-80)
     // Restore callee and GPR args, mixed together to agree with core spills bitmap.
     POP rcx
@@ -226,26 +226,26 @@
 MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
 1: // LOOP
     movb (%r10), %al              // al := *shorty
-    addq LITERAL(1), %r10         // shorty++
-    cmpb LITERAL(0), %al          // if (al == '\0') goto xmm_setup_finished
+    addq MACRO_LITERAL(1), %r10   // shorty++
+    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
     je VAR(finished, 1)
-    cmpb LITERAL(68), %al         // if (al == 'D') goto FOUND_DOUBLE
+    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
     je 2f
-    cmpb LITERAL(70), %al         // if (al == 'F') goto FOUND_FLOAT
+    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
     je 3f
-    addq LITERAL(4), %r11         // arg_array++
+    addq MACRO_LITERAL(4), %r11   // arg_array++
     //  Handle extra space in arg array taken by a long.
-    cmpb LITERAL(74), %al         // if (al != 'J') goto LOOP
+    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
     jne 1b
-    addq LITERAL(4), %r11         // arg_array++
+    addq MACRO_LITERAL(4), %r11   // arg_array++
     jmp 1b                        // goto LOOP
 2:  // FOUND_DOUBLE
     movsd (%r11), REG_VAR(xmm_reg, 0)
-    addq LITERAL(8), %r11         // arg_array+=2
+    addq MACRO_LITERAL(8), %r11   // arg_array+=2
     jmp 4f
 3:  // FOUND_FLOAT
     movss (%r11), REG_VAR(xmm_reg, 0)
-    addq LITERAL(4), %r11         // arg_array++
+    addq MACRO_LITERAL(4), %r11   // arg_array++
 4:
 END_MACRO
 
@@ -257,27 +257,27 @@
 MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
 1: // LOOP
     movb (%r10), %al              // al := *shorty
-    addq LITERAL(1), %r10         // shorty++
-    cmpb LITERAL(0), %al          // if (al == '\0') goto gpr_setup_finished
+    addq MACRO_LITERAL(1), %r10   // shorty++
+    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
     je  VAR(finished, 2)
-    cmpb LITERAL(74), %al         // if (al == 'J') goto FOUND_LONG
+    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
     je 2f
-    cmpb LITERAL(70), %al         // if (al == 'F') goto SKIP_FLOAT
+    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
     je 3f
-    cmpb LITERAL(68), %al         // if (al == 'D') goto SKIP_DOUBLE
+    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
     je 4f
     movl (%r11), REG_VAR(gpr_reg32, 1)
-    addq LITERAL(4), %r11         // arg_array++
+    addq MACRO_LITERAL(4), %r11   // arg_array++
     jmp 5f
 2:  // FOUND_LONG
     movq (%r11), REG_VAR(gpr_reg64, 0)
-    addq LITERAL(8), %r11         // arg_array+=2
+    addq MACRO_LITERAL(8), %r11   // arg_array+=2
     jmp 5f
 3:  // SKIP_FLOAT
-    addq LITERAL(4), %r11         // arg_array++
+    addq MACRO_LITERAL(4), %r11   // arg_array++
     jmp 1b
 4:  // SKIP_DOUBLE
-    addq LITERAL(8), %r11         // arg_array+=2
+    addq MACRO_LITERAL(8), %r11   // arg_array+=2
     jmp 1b
 5:
 END_MACRO
diff --git a/runtime/barrier.h b/runtime/barrier.h
index e335c32..0c7fd87 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -18,7 +18,6 @@
 #define ART_RUNTIME_BARRIER_H_
 
 #include "base/mutex.h"
-#include "locks.h"
 #include "UniquePtr.h"
 
 namespace art {
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 075d571..0fcec1f 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -192,7 +192,7 @@
     : data_(new LogMessageData(file, line, severity, error)) {
   }
 
-  ~LogMessage() LOCKS_EXCLUDED(Locks::logging_lock_);
+  ~LogMessage();  // TODO: enable LOCKS_EXCLUDED(Locks::logging_lock_).
 
   std::ostream& stream() {
     return data_->buffer;
@@ -235,32 +235,6 @@
   return os;
 }
 
-template<typename T>
-class MutatorLockedDumpable {
- public:
-  explicit MutatorLockedDumpable(T& value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : value_(value) {
-  }
-
-  void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    value_.Dump(os);
-  }
-
- private:
-  T& value_;
-
-  DISALLOW_COPY_AND_ASSIGN(MutatorLockedDumpable);
-};
-
-template<typename T>
-std::ostream& operator<<(std::ostream& os, const MutatorLockedDumpable<T>& rhs)
-// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) however annotalysis
-//       currently fails for this.
-    NO_THREAD_SAFETY_ANALYSIS {
-  rhs.Dump(os);
-  return os;
-}
-
 // Helps you use operator<< in a const char*-like context such as our various 'F' methods with
 // format strings.
 template<typename T>
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 6cc9396..b193ff1 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -178,48 +178,40 @@
 
 template<typename T> void UNUSED(const T&) {}
 
-#if defined(__SUPPORT_TS_ANNOTATION__)
-
-#define ACQUIRED_AFTER(...) __attribute__ ((acquired_after(__VA_ARGS__)))
-#define ACQUIRED_BEFORE(...) __attribute__ ((acquired_before(__VA_ARGS__)))
-#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__ ((exclusive_lock(__VA_ARGS__)))
-#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__ ((exclusive_locks_required(__VA_ARGS__)))
-#define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__ ((exclusive_trylock(__VA_ARGS__)))
-#define GUARDED_BY(x) __attribute__ ((guarded_by(x)))
-#define GUARDED_VAR __attribute__ ((guarded))
-#define LOCKABLE __attribute__ ((lockable))
-#define LOCK_RETURNED(x) __attribute__ ((lock_returned(x)))
-#define LOCKS_EXCLUDED(...) __attribute__ ((locks_excluded(__VA_ARGS__)))
-#define NO_THREAD_SAFETY_ANALYSIS __attribute__ ((no_thread_safety_analysis))
-#define PT_GUARDED_BY(x) __attribute__ ((point_to_guarded_by(x)))
-#define PT_GUARDED_VAR __attribute__ ((point_to_guarded))
-#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable))
-#define SHARED_LOCK_FUNCTION(...) __attribute__ ((shared_lock(__VA_ARGS__)))
-#define SHARED_LOCKS_REQUIRED(...) __attribute__ ((shared_locks_required(__VA_ARGS__)))
-#define SHARED_TRYLOCK_FUNCTION(...) __attribute__ ((shared_trylock(__VA_ARGS__)))
-#define UNLOCK_FUNCTION(...) __attribute__ ((unlock(__VA_ARGS__)))
-
+// Annotalysis thread-safety analysis support.
+#if defined(__SUPPORT_TS_ANNOTATION__) || defined(__clang__)
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
 #else
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#endif
 
-#define ACQUIRED_AFTER(...)
-#define ACQUIRED_BEFORE(...)
-#define EXCLUSIVE_LOCK_FUNCTION(...)
-#define EXCLUSIVE_LOCKS_REQUIRED(...)
-#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
-#define GUARDED_BY(x)
-#define GUARDED_VAR
-#define LOCKABLE
-#define LOCK_RETURNED(x)
-#define LOCKS_EXCLUDED(...)
-#define NO_THREAD_SAFETY_ANALYSIS
+#define ACQUIRED_AFTER(...) THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
+#define ACQUIRED_BEFORE(...) THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
+#define EXCLUSIVE_LOCKS_REQUIRED(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__))
+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded)
+#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable)
+#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+#define LOCKS_EXCLUDED(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+#define NO_THREAD_SAFETY_ANALYSIS THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
 #define PT_GUARDED_BY(x)
-#define PT_GUARDED_VAR
-#define SCOPED_LOCKABLE
-#define SHARED_LOCK_FUNCTION(...)
-#define SHARED_LOCKS_REQUIRED(...)
-#define SHARED_TRYLOCK_FUNCTION(...)
-#define UNLOCK_FUNCTION(...)
+// THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x))
+#define PT_GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded)
+#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+#define SHARED_LOCKS_REQUIRED(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(__VA_ARGS__))
 
-#endif  // defined(__SUPPORT_TS_ANNOTATION__)
+#if defined(__clang__)
+#define EXCLUSIVE_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__))
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__))
+#define SHARED_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__))
+#define SHARED_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__))
+#define UNLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__))
+#else
+#define EXCLUSIVE_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(__VA_ARGS__))
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(__VA_ARGS__))
+#define SHARED_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(__VA_ARGS__))
+#define SHARED_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(__VA_ARGS__))
+#define UNLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(unlock(__VA_ARGS__))
+#endif
 
 #endif  // ART_RUNTIME_BASE_MACROS_H_
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index ff72d16..fdf5763 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -29,6 +29,30 @@
 
 namespace art {
 
+Mutex* Locks::abort_lock_ = nullptr;
+Mutex* Locks::breakpoint_lock_ = nullptr;
+Mutex* Locks::deoptimization_lock_ = nullptr;
+ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
+ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
+Mutex* Locks::logging_lock_ = nullptr;
+ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
+Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::thread_list_lock_ = nullptr;
+Mutex* Locks::thread_suspend_count_lock_ = nullptr;
+Mutex* Locks::trace_lock_ = nullptr;
+Mutex* Locks::profiler_lock_ = nullptr;
+Mutex* Locks::unexpected_signal_lock_ = nullptr;
+Mutex* Locks::intern_table_lock_ = nullptr;
+
+struct AllMutexData {
+  // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
+  Atomic<const BaseMutex*> all_mutexes_guard;
+  // All created mutexes guarded by all_mutexes_guard_.
+  std::set<BaseMutex*>* all_mutexes;
+  AllMutexData() : all_mutexes(NULL) {}
+};
+static struct AllMutexData gAllMutexData[kAllMutexDataSize];
+
 #if ART_USE_FUTEXES
 static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) {
   const int32_t one_sec = 1000 * 1000 * 1000;  // one second in nanoseconds.
@@ -45,15 +69,6 @@
 }
 #endif
 
-struct AllMutexData {
-  // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
-  Atomic<const BaseMutex*> all_mutexes_guard;
-  // All created mutexes guarded by all_mutexes_guard_.
-  std::set<BaseMutex*>* all_mutexes;
-  AllMutexData() : all_mutexes(NULL) {}
-};
-static struct AllMutexData gAllMutexData[kAllMutexDataSize];
-
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
@@ -792,4 +807,53 @@
   guard_.recursion_count_ = old_recursion_count;
 }
 
+void Locks::Init() {
+  if (logging_lock_ != nullptr) {
+    // Already initialized.
+    DCHECK(abort_lock_ != nullptr);
+    DCHECK(breakpoint_lock_ != nullptr);
+    DCHECK(deoptimization_lock_ != nullptr);
+    DCHECK(classlinker_classes_lock_ != nullptr);
+    DCHECK(heap_bitmap_lock_ != nullptr);
+    DCHECK(logging_lock_ != nullptr);
+    DCHECK(mutator_lock_ != nullptr);
+    DCHECK(thread_list_lock_ != nullptr);
+    DCHECK(thread_suspend_count_lock_ != nullptr);
+    DCHECK(trace_lock_ != nullptr);
+    DCHECK(profiler_lock_ != nullptr);
+    DCHECK(unexpected_signal_lock_ != nullptr);
+    DCHECK(intern_table_lock_ != nullptr);
+  } else {
+    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
+    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
+
+    DCHECK(breakpoint_lock_ == nullptr);
+    breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock);
+    DCHECK(deoptimization_lock_ == nullptr);
+    deoptimization_lock_ = new Mutex("deoptimization lock", kDeoptimizationLock);
+    DCHECK(classlinker_classes_lock_ == nullptr);
+    classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
+                                                      kClassLinkerClassesLock);
+    DCHECK(heap_bitmap_lock_ == nullptr);
+    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
+    DCHECK(mutator_lock_ == nullptr);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
+    DCHECK(runtime_shutdown_lock_ == nullptr);
+    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock);
+    DCHECK(thread_list_lock_ == nullptr);
+    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
+    DCHECK(thread_suspend_count_lock_ == nullptr);
+    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
+    DCHECK(trace_lock_ == nullptr);
+    trace_lock_ = new Mutex("trace lock", kTraceLock);
+    DCHECK(profiler_lock_ == nullptr);
+    profiler_lock_ = new Mutex("profiler lock", kProfilerLock);
+    DCHECK(unexpected_signal_lock_ == nullptr);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+    DCHECK(intern_table_lock_ == nullptr);
+    intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock);
+  }
+}
+
+
 }  // namespace art
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 63ed6cb..55ec1c3 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -27,7 +27,6 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
-#include "locks.h"
 
 #if defined(__APPLE__)
 #define ART_USE_FUTEXES 0
@@ -44,9 +43,56 @@
 
 namespace art {
 
+class LOCKABLE ReaderWriterMutex;
 class ScopedContentionRecorder;
 class Thread;
 
+// LockLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or
+// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free
+// partial ordering and thereby cause deadlock situations to fail checks.
+//
+// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163
+enum LockLevel {
+  kLoggingLock = 0,
+  kUnexpectedSignalLock,
+  kThreadSuspendCountLock,
+  kAbortLock,
+  kJdwpSocketLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
+  kAllocSpaceLock,
+  kDexFileMethodInlinerLock,
+  kDexFileToMethodInlinerMapLock,
+  kMarkSweepMarkStackLock,
+  kTransactionLogLock,
+  kInternTableLock,
+  kMonitorPoolLock,
+  kDefaultMutexLevel,
+  kMarkSweepLargeObjectLock,
+  kPinTableLock,
+  kLoadLibraryLock,
+  kJdwpObjectRegistryLock,
+  kClassLinkerClassesLock,
+  kBreakpointLock,
+  kMonitorLock,
+  kThreadListLock,
+  kBreakpointInvokeLock,
+  kDeoptimizationLock,
+  kTraceLock,
+  kProfilerLock,
+  kJdwpEventListLock,
+  kJdwpAttachLock,
+  kJdwpStartLock,
+  kRuntimeShutdownLock,
+  kHeapBitmapLock,
+  kMutatorLock,
+  kZygoteCreationLock,
+
+  kLockLevelCount  // Must come last.
+};
+std::ostream& operator<<(std::ostream& os, const LockLevel& rhs);
+
 const bool kDebugLocking = kIsDebugBuild;
 
 // Record Log contention information, dumpable via SIGQUIT.
@@ -413,6 +459,117 @@
 // "WriterMutexLock mu(lock)".
 #define WriterMutexLock(x) COMPILE_ASSERT(0, writer_mutex_lock_declaration_missing_variable_name)
 
+// Global mutexes corresponding to the levels above.
+class Locks {
+ public:
+  static void Init();
+
+  // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
+  // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
+  // a share on the mutator_lock_. The garbage collector may also execute with shared access but
+  // at times requires exclusive access to the heap (not to be confused with the heap meta-data
+  // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks
+  // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_
+  // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition
+  // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on
+  // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector)
+  // chance to acquire the lock.
+  //
+  // Thread suspension:
+  // Shared users                                  | Exclusive user
+  // (holding mutator lock and in kRunnable state) |   .. running ..
+  //   .. running ..                               | Request thread suspension by:
+  //   .. running ..                               |   - acquiring thread_suspend_count_lock_
+  //   .. running ..                               |   - incrementing Thread::suspend_count_ on
+  //   .. running ..                               |     all mutator threads
+  //   .. running ..                               |   - releasing thread_suspend_count_lock_
+  //   .. running ..                               | Block trying to acquire exclusive mutator lock
+  // Poll Thread::suspend_count_ and enter full    |   .. blocked ..
+  // suspend code.                                 |   .. blocked ..
+  // Change state to kSuspended                    |   .. blocked ..
+  // x: Release share on mutator_lock_             | Carry out exclusive access
+  // Acquire thread_suspend_count_lock_            |   .. exclusive ..
+  // while Thread::suspend_count_ > 0              |   .. exclusive ..
+  //   - wait on Thread::resume_cond_              |   .. exclusive ..
+  //     (releases thread_suspend_count_lock_)     |   .. exclusive ..
+  //   .. waiting ..                               | Release mutator_lock_
+  //   .. waiting ..                               | Request thread resumption by:
+  //   .. waiting ..                               |   - acquiring thread_suspend_count_lock_
+  //   .. waiting ..                               |   - decrementing Thread::suspend_count_ on
+  //   .. waiting ..                               |     all mutator threads
+  //   .. waiting ..                               |   - notifying on Thread::resume_cond_
+  //    - re-acquire thread_suspend_count_lock_    |   - releasing thread_suspend_count_lock_
+  // Release thread_suspend_count_lock_            |  .. running ..
+  // Acquire share on mutator_lock_                |  .. running ..
+  //  - This could block but the thread still      |  .. running ..
+  //    has a state of kSuspended and so this      |  .. running ..
+  //    isn't an issue.                            |  .. running ..
+  // Acquire thread_suspend_count_lock_            |  .. running ..
+  //  - we poll here as we're transitioning into   |  .. running ..
+  //    kRunnable and an individual thread suspend |  .. running ..
+  //    request (e.g for debugging) won't try      |  .. running ..
+  //    to acquire the mutator lock (which would   |  .. running ..
+  //    block as we hold the mutator lock). This   |  .. running ..
+  //    poll ensures that if the suspender thought |  .. running ..
+  //    we were suspended by incrementing our      |  .. running ..
+  //    Thread::suspend_count_ and then reading    |  .. running ..
+  //    our state we go back to waiting on         |  .. running ..
+  //    Thread::resume_cond_.                      |  .. running ..
+  // can_go_runnable = Thread::suspend_count_ == 0 |  .. running ..
+  // Release thread_suspend_count_lock_            |  .. running ..
+  // if can_go_runnable                            |  .. running ..
+  //   Change state to kRunnable                   |  .. running ..
+  // else                                          |  .. running ..
+  //   Goto x                                      |  .. running ..
+  //  .. running ..                                |  .. running ..
+  static ReaderWriterMutex* mutator_lock_;
+
+  // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
+  static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
+
+  // Guards shutdown of the runtime.
+  static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
+
+  // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
+  // attaching and detaching.
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+
+  // Guards breakpoints.
+  static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_);
+
+  // Guards deoptimization requests.
+  static Mutex* deoptimization_lock_ ACQUIRED_AFTER(breakpoint_lock_);
+
+  // Guards trace requests.
+  static Mutex* trace_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+
+  // Guards profile objects.
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_);
+
+  // Guards lists of classes within the class linker.
+  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_);
+
+  // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
+  // doesn't try to hold a higher level Mutex.
+  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
+
+  // Guards intern table.
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Have an exclusive aborting thread.
+  static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Allow mutual exclusion when manipulating Thread::suspend_count_.
+  // TODO: Does the trade-off of a per-thread lock make sense?
+  static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_);
+
+  // One unexpected signal at a time lock.
+  static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_);
+
+  // Have an exclusive logging thread.
+  static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_MUTEX_H_
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index aad7cfc..701e62e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -260,7 +260,7 @@
   bool GenerateOatFile(const char* dex_filename,
                        int oat_fd,
                        const char* oat_cache_filename,
-                       std::string* error_msg);
+                       std::string* error_msg)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   const OatFile* FindOatFileFromOatLocation(const std::string& location,
@@ -519,7 +519,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const OatFile* FindOpenedOatFileFromDexLocation(const char* dex_location,
                                                   const uint32_t* const dex_location_checksum)
-      LOCKS_EXCLUDED(dex_lock);
+      LOCKS_EXCLUDED(dex_lock_);
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
       LOCKS_EXCLUDED(dex_lock_);
   const DexFile* FindDexFileInOatLocation(const char* dex_location,
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 7233d8e..b07043f 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_COMPILER_CALLBACKS_H_
 #define ART_RUNTIME_COMPILER_CALLBACKS_H_
 
+#include "base/mutex.h"
 #include "class_reference.h"
-#include "locks.h"
 
 namespace art {
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 3b4e9c7..7e2dfd2 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -184,14 +184,14 @@
 static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
-static ObjectRegistry* gRegistry = NULL;
+static ObjectRegistry* gRegistry = nullptr;
 
 // Recent allocation tracking.
-static Mutex gAllocTrackerLock DEFAULT_MUTEX_ACQUIRED_AFTER("AllocTracker lock");
-AllocRecord* Dbg::recent_allocation_records_ PT_GUARDED_BY(gAllocTrackerLock) = NULL;  // TODO: CircularBuffer<AllocRecord>
-static size_t gAllocRecordMax GUARDED_BY(gAllocTrackerLock) = 0;
-static size_t gAllocRecordHead GUARDED_BY(gAllocTrackerLock) = 0;
-static size_t gAllocRecordCount GUARDED_BY(gAllocTrackerLock) = 0;
+Mutex* Dbg::alloc_tracker_lock_ = nullptr;
+AllocRecord* Dbg::recent_allocation_records_ = nullptr;  // TODO: CircularBuffer<AllocRecord>
+size_t Dbg::alloc_record_max_ = 0;
+size_t Dbg::alloc_record_head_ = 0;
+size_t Dbg::alloc_record_count_ = 0;
 
 // Deoptimization support.
 struct MethodInstrumentationRequest {
@@ -468,9 +468,10 @@
     return;
   }
 
-  CHECK(gRegistry == NULL);
+  CHECK(gRegistry == nullptr);
   gRegistry = new ObjectRegistry;
 
+  alloc_tracker_lock_ = new Mutex("AllocTracker lock");
   // Init JDWP if the debugger is enabled. This may connect out to a
   // debugger, passively listen for a debugger, or block waiting for a
   // debugger.
@@ -496,9 +497,11 @@
   // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection.
   Disposed();
   delete gJdwpState;
-  gJdwpState = NULL;
+  gJdwpState = nullptr;
   delete gRegistry;
-  gRegistry = NULL;
+  gRegistry = nullptr;
+  delete alloc_tracker_lock_;
+  alloc_tracker_lock_ = nullptr;
 }
 
 void Dbg::GcDidFinish() {
@@ -3695,15 +3698,15 @@
 }
 
 void Dbg::SetAllocTrackingEnabled(bool enabled) {
-  MutexLock mu(Thread::Current(), gAllocTrackerLock);
+  MutexLock mu(Thread::Current(), *alloc_tracker_lock_);
   if (enabled) {
     if (recent_allocation_records_ == NULL) {
-      gAllocRecordMax = GetAllocTrackerMax();
-      LOG(INFO) << "Enabling alloc tracker (" << gAllocRecordMax << " entries of "
+      alloc_record_max_ = GetAllocTrackerMax();
+      LOG(INFO) << "Enabling alloc tracker (" << alloc_record_max_ << " entries of "
                 << kMaxAllocRecordStackDepth << " frames, taking "
-                << PrettySize(sizeof(AllocRecord) * gAllocRecordMax) << ")";
-      gAllocRecordHead = gAllocRecordCount = 0;
-      recent_allocation_records_ = new AllocRecord[gAllocRecordMax];
+                << PrettySize(sizeof(AllocRecord) * alloc_record_max_) << ")";
+      alloc_record_head_ = alloc_record_count_ = 0;
+      recent_allocation_records_ = new AllocRecord[alloc_record_max_];
       CHECK(recent_allocation_records_ != NULL);
     }
     Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
@@ -3750,18 +3753,18 @@
   Thread* self = Thread::Current();
   CHECK(self != NULL);
 
-  MutexLock mu(self, gAllocTrackerLock);
+  MutexLock mu(self, *alloc_tracker_lock_);
   if (recent_allocation_records_ == NULL) {
     return;
   }
 
   // Advance and clip.
-  if (++gAllocRecordHead == gAllocRecordMax) {
-    gAllocRecordHead = 0;
+  if (++alloc_record_head_ == alloc_record_max_) {
+    alloc_record_head_ = 0;
   }
 
   // Fill in the basics.
-  AllocRecord* record = &recent_allocation_records_[gAllocRecordHead];
+  AllocRecord* record = &recent_allocation_records_[alloc_record_head_];
   record->type = type;
   record->byte_count = byte_count;
   record->thin_lock_id = self->GetThreadId();
@@ -3770,8 +3773,8 @@
   AllocRecordStackVisitor visitor(self, record);
   visitor.WalkStack();
 
-  if (gAllocRecordCount < gAllocRecordMax) {
-    ++gAllocRecordCount;
+  if (alloc_record_count_ < alloc_record_max_) {
+    ++alloc_record_count_;
   }
 }
 
@@ -3783,13 +3786,14 @@
 //
 // We need to handle underflow in our circular buffer, so we add
 // gAllocRecordMax and then mask it back down.
-static inline int HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(gAllocTrackerLock) {
-  return (gAllocRecordHead+1 + gAllocRecordMax - gAllocRecordCount) & (gAllocRecordMax-1);
+size_t Dbg::HeadIndex() {
+  return (Dbg::alloc_record_head_ + 1 + Dbg::alloc_record_max_ - Dbg::alloc_record_count_) &
+      (Dbg::alloc_record_max_ - 1);
 }
 
 void Dbg::DumpRecentAllocations() {
   ScopedObjectAccess soa(Thread::Current());
-  MutexLock mu(soa.Self(), gAllocTrackerLock);
+  MutexLock mu(soa.Self(), *alloc_tracker_lock_);
   if (recent_allocation_records_ == NULL) {
     LOG(INFO) << "Not recording tracked allocations";
     return;
@@ -3798,9 +3802,9 @@
   // "i" is the head of the list.  We want to start at the end of the
   // list and move forward to the tail.
   size_t i = HeadIndex();
-  size_t count = gAllocRecordCount;
+  size_t count = alloc_record_count_;
 
-  LOG(INFO) << "Tracked allocations, (head=" << gAllocRecordHead << " count=" << count << ")";
+  LOG(INFO) << "Tracked allocations, (head=" << alloc_record_head_ << " count=" << count << ")";
   while (count--) {
     AllocRecord* record = &recent_allocation_records_[i];
 
@@ -3820,22 +3824,20 @@
       usleep(40000);
     }
 
-    i = (i + 1) & (gAllocRecordMax-1);
+    i = (i + 1) & (alloc_record_max_ - 1);
   }
 }
 
 void Dbg::UpdateObjectPointers(IsMarkedCallback* visitor, void* arg) {
-  {
-    MutexLock mu(Thread::Current(), gAllocTrackerLock);
-    if (recent_allocation_records_ != nullptr) {
-      size_t i = HeadIndex();
-      size_t count = gAllocRecordCount;
-      while (count--) {
-        AllocRecord* record = &recent_allocation_records_[i];
-        DCHECK(record != nullptr);
-        record->UpdateObjectPointers(visitor, arg);
-        i = (i + 1) & (gAllocRecordMax - 1);
-      }
+  if (recent_allocation_records_ != nullptr) {
+    MutexLock mu(Thread::Current(), *alloc_tracker_lock_);
+    size_t i = HeadIndex();
+    size_t count = alloc_record_count_;
+    while (count--) {
+      AllocRecord* record = &recent_allocation_records_[i];
+      DCHECK(record != nullptr);
+      record->UpdateObjectPointers(visitor, arg);
+      i = (i + 1) & (alloc_record_max_ - 1);
     }
   }
   if (gRegistry != nullptr) {
@@ -3941,7 +3943,7 @@
   Thread* self = Thread::Current();
   std::vector<uint8_t> bytes;
   {
-    MutexLock mu(self, gAllocTrackerLock);
+    MutexLock mu(self, *alloc_tracker_lock_);
     //
     // Part 1: generate string tables.
     //
@@ -3949,7 +3951,7 @@
     StringTable method_names;
     StringTable filenames;
 
-    int count = gAllocRecordCount;
+    int count = alloc_record_count_;
     int idx = HeadIndex();
     while (count--) {
       AllocRecord* record = &recent_allocation_records_[idx];
@@ -3967,10 +3969,10 @@
         }
       }
 
-      idx = (idx + 1) & (gAllocRecordMax-1);
+      idx = (idx + 1) & (alloc_record_max_ - 1);
     }
 
-    LOG(INFO) << "allocation records: " << gAllocRecordCount;
+    LOG(INFO) << "allocation records: " << alloc_record_count_;
 
     //
     // Part 2: Generate the output and store it in the buffer.
@@ -3991,14 +3993,14 @@
     // (2b) number of class name strings
     // (2b) number of method name strings
     // (2b) number of source file name strings
-    JDWP::Append2BE(bytes, gAllocRecordCount);
+    JDWP::Append2BE(bytes, alloc_record_count_);
     size_t string_table_offset = bytes.size();
     JDWP::Append4BE(bytes, 0);  // We'll patch this later...
     JDWP::Append2BE(bytes, class_names.Size());
     JDWP::Append2BE(bytes, method_names.Size());
     JDWP::Append2BE(bytes, filenames.Size());
 
-    count = gAllocRecordCount;
+    count = alloc_record_count_;
     idx = HeadIndex();
     while (count--) {
       // For each entry:
@@ -4032,7 +4034,7 @@
         JDWP::Append2BE(bytes, record->stack[stack_frame].LineNumber());
       }
 
-      idx = (idx + 1) & (gAllocRecordMax-1);
+      idx = (idx + 1) & (alloc_record_max_ - 1);
     }
 
     // (xb) class name strings
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 5d269ee..6c44bde 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -391,7 +391,7 @@
       LOCKS_EXCLUDED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void DisableFullDeoptimization()
-      EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_)
+      LOCKS_EXCLUDED(Locks::deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Manage deoptimization after updating JDWP events list. This must be done while all mutator
@@ -448,8 +448,11 @@
   static void RecordAllocation(mirror::Class* type, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void SetAllocTrackingEnabled(bool enabled);
-  static inline bool IsAllocTrackingEnabled() { return recent_allocation_records_ != NULL; }
+  static bool IsAllocTrackingEnabled() {
+    return recent_allocation_records_ != nullptr;
+  }
   static jbyteArray GetRecentAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static size_t HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(alloc_tracker_lock_);
   static void DumpRecentAllocations();
 
   // Updates the stored direct object pointers (called from SweepSystemWeaks).
@@ -488,7 +491,14 @@
   static void PostThreadStartOrStop(Thread*, uint32_t)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static AllocRecord* recent_allocation_records_;
+  static Mutex* alloc_tracker_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(alloc_tracker_lock_);
+  static size_t alloc_record_max_ GUARDED_BY(alloc_tracker_lock_);
+  static size_t alloc_record_head_ GUARDED_BY(alloc_tracker_lock_);
+  static size_t alloc_record_count_ GUARDED_BY(alloc_tracker_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(Dbg);
 };
 
 #define CHUNK_TYPE(_name) \
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index a8fb6c1..498ac2c 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -29,7 +29,6 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/throwable.h"
-#include "locks.h"
 #include "object_utils.h"
 #include "sirt_ref.h"
 #include "thread.h"
@@ -642,8 +641,7 @@
 }
 
 static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-    UNLOCK_FUNCTION(monitor_lock_) {
+    NO_THREAD_SAFETY_ANALYSIS /* SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) */ {
   // Save any pending exception over monitor exit call.
   mirror::Throwable* saved_exception = NULL;
   ThrowLocation saved_throw_location;
diff --git a/runtime/entrypoints/portable/portable_jni_entrypoints.cc b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
index de1e32e..17ad4d0 100644
--- a/runtime/entrypoints/portable/portable_jni_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
@@ -23,7 +23,7 @@
 
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern "C" uint32_t art_portable_jni_method_start(Thread* self)
-    UNLOCK_FUNCTION(GlobalSynchronizatio::mutator_lock_) {
+    UNLOCK_FUNCTION(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
   uint32_t saved_local_ref_cookie = env->local_ref_cookie;
   env->local_ref_cookie = env->locals.GetSegmentState();
@@ -32,7 +32,7 @@
 }
 
 extern "C" uint32_t art_portable_jni_method_start_synchronized(jobject to_lock, Thread* self)
-    UNLOCK_FUNCTION(Locks::mutator_lock_) {
+    UNLOCK_FUNCTION(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS {
   self->DecodeJObject(to_lock)->MonitorEnter(self);
   return art_portable_jni_method_start(self);
 }
diff --git a/runtime/entrypoints/portable/portable_lock_entrypoints.cc b/runtime/entrypoints/portable/portable_lock_entrypoints.cc
index 44d3da9..358ac23 100644
--- a/runtime/entrypoints/portable/portable_lock_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_lock_entrypoints.cc
@@ -20,8 +20,9 @@
 namespace art {
 
 extern "C" void art_portable_lock_object_from_code(mirror::Object* obj, Thread* thread)
-    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
-  DCHECK(obj != NULL);        // Assumed to have been checked before entry.
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
+  DCHECK(obj != nullptr);        // Assumed to have been checked before entry.
   obj->MonitorEnter(thread);  // May block.
   DCHECK(thread->HoldsLock(obj));
   // Only possible exception is NPE and is handled before entry.
@@ -29,8 +30,9 @@
 }
 
 extern "C" void art_portable_unlock_object_from_code(mirror::Object* obj, Thread* thread)
-    UNLOCK_FUNCTION(monitor_lock_) {
-  DCHECK(obj != NULL);  // Assumed to have been checked before entry.
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
+  DCHECK(obj != nullptr);  // Assumed to have been checked before entry.
   // MonitorExit may throw exception.
   obj->MonitorExit(thread);
 }
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index 8f70049..3fd4adc 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -26,8 +26,8 @@
 }  // namespace mirror
 
 // Place a special frame at the TOS that will save the callee saves for the given type.
-static void FinishCalleeSaveFrameSetup(Thread* self, mirror::ArtMethod** sp,
-                                       Runtime::CalleeSaveType type)
+static inline void FinishCalleeSaveFrameSetup(Thread* self, mirror::ArtMethod** sp,
+                                              Runtime::CalleeSaveType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 5bc7f4c..817d053 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -21,7 +21,8 @@
 namespace art {
 
 extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
-    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   if (UNLIKELY(obj == NULL)) {
     ThrowLocation throw_location(self->GetCurrentLocationForThrow());
@@ -42,7 +43,8 @@
 }
 
 extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
-    UNLOCK_FUNCTION(monitor_lock_) {
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   if (UNLIKELY(obj == NULL)) {
     ThrowLocation throw_location(self->GetCurrentLocationForThrow());
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index bb4d1d7..8b7bfd3 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_
 #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_
 
+#include "base/mutex.h"
 #include "globals.h"
-#include "locks.h"
 #include "mem_map.h"
 #include "UniquePtr.h"
 
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index dde1425..7cfeb63 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -19,7 +19,6 @@
 
 #include "base/logging.h"
 #include "gc_allocator.h"
-#include "locks.h"
 #include "object_callbacks.h"
 #include "space_bitmap.h"
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 06127c1..8871921 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -175,7 +175,6 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  // TODO: Fixme when anotatalysis works with visitors.
   void operator()(Object* obj, Object* ref,
                   const MemberOffset& /* offset */, bool /* is_static */) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 3c4b674..5fd2bce 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -17,9 +17,9 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 
+#include "base/mutex.h"
 #include "gc_allocator.h"
 #include "globals.h"
-#include "locks.h"
 #include "mem_map.h"
 #include "object_callbacks.h"
 #include "UniquePtr.h"
@@ -248,8 +248,7 @@
     contained_ = space_set.contained_;
   }
 
-  void Walk(ObjectCallback* callback, void* arg)
-      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
+  void Walk(ObjectCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   template <typename Visitor>
   void Visit(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS {
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 088f1d4..8d401b8 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -18,10 +18,10 @@
 #define ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 
 #include "base/histogram.h"
+#include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "gc/gc_cause.h"
 #include "gc_type.h"
-#include "locks.h"
 #include <stdint.h>
 #include <vector>
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 4aff68a..71424bd 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1347,9 +1347,6 @@
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
 
-  timings_.NewSplit("RequestHeapTrim");
-  heap->RequestHeapTrim();
-
   // Update the cumulative statistics
   total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 5c0a233..8d40c34 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -114,7 +114,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsImmuneSpace(const space::ContinuousSpace* space) const;
+  bool IsImmuneSpace(const space::ContinuousSpace* space) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
@@ -152,6 +152,7 @@
 
   // Sweep only pointers within an array. WARNING: Trashes objects.
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Blackens an object.
diff --git a/runtime/gc/collector/partial_mark_sweep.h b/runtime/gc/collector/partial_mark_sweep.h
index 44ae9e9..ac0d068 100644
--- a/runtime/gc/collector/partial_mark_sweep.h
+++ b/runtime/gc/collector/partial_mark_sweep.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_PARTIAL_MARK_SWEEP_H_
 #define ART_RUNTIME_GC_COLLECTOR_PARTIAL_MARK_SWEEP_H_
 
-#include "locks.h"
 #include "mark_sweep.h"
 
 namespace art {
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index a577f90..2da360f 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -678,13 +678,14 @@
   heap_->DelayReferenceReferent(klass, obj, MarkedForwardingAddressCallback, this);
 }
 
-// Visit all of the references of an object and update.
-void SemiSpace::ScanObject(Object* obj) {
-  DCHECK(obj != NULL);
-  DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
-  MarkSweep::VisitObjectReferences(obj, [this](Object* obj, Object* ref, const MemberOffset& offset,
-     bool /* is_static */) ALWAYS_INLINE_LAMBDA NO_THREAD_SAFETY_ANALYSIS {
-    mirror::Object* new_address = MarkObject(ref);
+class SemiSpaceMarkObjectVisitor {
+ public:
+  explicit SemiSpaceMarkObjectVisitor(SemiSpace* semi_space) : semi_space_(semi_space) {
+  }
+
+  void operator()(Object* obj, Object* ref, const MemberOffset& offset, bool /* is_static */)
+      const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) */ {
+    mirror::Object* new_address = semi_space_->MarkObject(ref);
     if (new_address != ref) {
       DCHECK(new_address != nullptr);
       // Don't need to mark the card since we updating the object address and not changing the
@@ -694,7 +695,17 @@
       // disable check as we could run inside a transaction.
       obj->SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(offset, new_address, false);
     }
-  }, kMovingClasses);
+  }
+ private:
+  SemiSpace* const semi_space_;
+};
+
+// Visit all of the references of an object and update.
+void SemiSpace::ScanObject(Object* obj) {
+  DCHECK(obj != NULL);
+  DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
+  SemiSpaceMarkObjectVisitor visitor(this);
+  MarkSweep::VisitObjectReferences(obj, visitor, kMovingClasses);
   mirror::Class* klass = obj->GetClass<kVerifyNone>();
   if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
     DelayReferenceReferent(klass, obj);
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 98f2b592..934b1bd 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -18,7 +18,6 @@
 #define ART_RUNTIME_GC_COLLECTOR_STICKY_MARK_SWEEP_H_
 
 #include "base/macros.h"
-#include "locks.h"
 #include "partial_mark_sweep.h"
 
 namespace art {
@@ -43,7 +42,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void Sweep(bool swap_bitmaps) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void Sweep(bool swap_bitmaps) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Don't need to do anything special here since we scan all the cards which may have references
   // to the newly allocated objects.
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 89ded0b..3d2f7ea 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -107,7 +107,7 @@
   // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
   // the allocator_type should be constant propagated.
   if (AllocatorMayHaveConcurrentGC(allocator) && concurrent_gc_) {
-    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
+    CheckConcurrentGC(self, new_num_bytes_allocated, &obj);
   }
   VerifyObject(obj);
   self->VerifyStack();
@@ -280,11 +280,13 @@
 }
 
 inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
-                                    mirror::Object* obj) {
+                                    mirror::Object** obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
     // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
-    SirtRef<mirror::Object> ref(self, obj);
+    SirtRef<mirror::Object> ref(self, *obj);
     RequestConcurrentGC(self);
+    // Restore obj in case it moved.
+    *obj = ref.get();
   }
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 87ee21b..2497e6a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -90,6 +90,11 @@
       collector_type_(kCollectorTypeNone),
       post_zygote_collector_type_(post_zygote_collector_type),
       background_collector_type_(background_collector_type),
+      desired_collector_type_(collector_type_),
+      heap_trim_request_lock_(nullptr),
+      heap_trim_target_time_(0),
+      heap_transition_target_time_(0),
+      heap_trim_request_pending_(false),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
       low_memory_mode_(low_memory_mode),
@@ -127,7 +132,6 @@
       verify_mod_union_table_(false),
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
-      last_trim_time_ms_(0),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
        * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
@@ -160,16 +164,17 @@
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
   if (!Runtime::Current()->IsZygote()) {
-    ChangeCollector(post_zygote_collector_type_);
+    desired_collector_type_ = post_zygote_collector_type_;
     large_object_threshold_ = kDefaultLargeObjectThreshold;
   } else {
     if (kMovingCollector) {
       // We are the zygote, use bump pointer allocation + semi space collector.
-      ChangeCollector(kCollectorTypeSS);
+      desired_collector_type_ = kCollectorTypeSS;
     } else {
-      ChangeCollector(post_zygote_collector_type_);
+      desired_collector_type_ = post_zygote_collector_type_;
     }
   }
+  ChangeCollector(desired_collector_type_);
 
   live_bitmap_.reset(new accounting::HeapBitmap(this));
   mark_bitmap_.reset(new accounting::HeapBitmap(this));
@@ -274,7 +279,7 @@
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
-  last_gc_time_ns_ = NanoTime();
+  heap_trim_request_lock_ = new Mutex("Heap trim request lock");
   last_gc_size_ = GetBytesAllocated();
 
   if (ignore_max_footprint_) {
@@ -318,6 +323,16 @@
   }
 }
 
+void Heap::DisableCompaction() {
+  if (IsCompactingGC(post_zygote_collector_type_)) {
+    post_zygote_collector_type_ = kCollectorTypeCMS;
+  }
+  if (IsCompactingGC(background_collector_type_)) {
+    background_collector_type_ = post_zygote_collector_type_;
+  }
+  TransitionCollector(post_zygote_collector_type_);
+}
+
 std::string Heap::SafeGetClassDescriptor(mirror::Class* klass) {
   if (!IsValidContinuousSpaceObjectAddress(klass)) {
     return StringPrintf("<non heap address klass %p>", klass);
@@ -442,12 +457,12 @@
   if (process_state_ != process_state) {
     process_state_ = process_state;
     if (process_state_ == kProcessStateJankPerceptible) {
-      TransitionCollector(post_zygote_collector_type_);
+      // Transition back to foreground right away to prevent jank.
+      RequestHeapTransition(post_zygote_collector_type_, 0);
     } else {
-      TransitionCollector(background_collector_type_);
+      // Don't delay for debug builds since we may want to stress test the GC.
+      RequestHeapTransition(background_collector_type_, kIsDebugBuild ? 0 : kHeapTransitionWait);
     }
-  } else {
-    CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   }
 }
 
@@ -844,9 +859,40 @@
   self->ThrowOutOfMemoryError(oss.str().c_str());
 }
 
+void Heap::DoPendingTransitionOrTrim() {
+  Thread* self = Thread::Current();
+  CollectorType desired_collector_type;
+  // Wait until we reach the desired transition time.
+  while (true) {
+    uint64_t wait_time;
+    {
+      MutexLock mu(self, *heap_trim_request_lock_);
+      desired_collector_type = desired_collector_type_;
+      uint64_t current_time = NanoTime();
+      if (current_time >= heap_transition_target_time_) {
+        break;
+      }
+      wait_time = heap_transition_target_time_ - current_time;
+    }
+    ScopedThreadStateChange tsc(self, kSleeping);
+    usleep(wait_time / 1000);  // Usleep takes microseconds.
+  }
+  // Transition the heap if the desired collector type is nto the same as the current collector type.
+  TransitionCollector(desired_collector_type);
+  // Do a heap trim if it is needed.
+  Trim();
+}
+
 void Heap::Trim() {
   Thread* self = Thread::Current();
   {
+    MutexLock mu(self, *heap_trim_request_lock_);
+    if (!heap_trim_request_pending_ || NanoTime() < heap_trim_target_time_) {
+      return;
+    }
+    heap_trim_request_pending_ = false;
+  }
+  {
     // Need to do this before acquiring the locks since we don't want to get suspended while
     // holding any locks.
     ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
@@ -1731,6 +1777,7 @@
   collector->Run(gc_cause, clear_soft_references);
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
+  RequestHeapTrim(Heap::kHeapTrimWait);
   // Enqueue cleared references.
   EnqueueClearedReferences();
   // Grow the heap so that we know when to perform the next GC.
@@ -2493,7 +2540,20 @@
   }
 }
 
-void Heap::RequestHeapTrim() {
+void Heap::RequestHeapTransition(CollectorType desired_collector_type, uint64_t delta_time) {
+  Thread* self = Thread::Current();
+  {
+    MutexLock mu(self, *heap_trim_request_lock_);
+    if (desired_collector_type_ == desired_collector_type) {
+      return;
+    }
+    heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time);
+    desired_collector_type_ = desired_collector_type;
+  }
+  SignalHeapTrimDaemon(self);
+}
+
+void Heap::RequestHeapTrim(uint64_t delta_time) {
   // GC completed and now we must decide whether to request a heap trim (advising pages back to the
   // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
   // a space it will hold its lock and can become a cause of jank.
@@ -2506,11 +2566,6 @@
   // to utilization (which is probably inversely proportional to how much benefit we can expect).
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
-  uint64_t ms_time = MilliTime();
-  // Don't bother trimming the alloc space if a heap trim occurred in the last two seconds.
-  if (ms_time - last_trim_time_ms_ < 2 * 1000) {
-    return;
-  }
 
   Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
@@ -2521,19 +2576,27 @@
     return;
   }
 
-  last_trim_time_ms_ = ms_time;
-
-  // Trim only if we do not currently care about pause times.
+  // Request a heap trim only if we do not currently care about pause times.
   if (!CareAboutPauseTimes()) {
-    JNIEnv* env = self->GetJniEnv();
-    DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
-    DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != NULL);
-    env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
-                              WellKnownClasses::java_lang_Daemons_requestHeapTrim);
-    CHECK(!env->ExceptionCheck());
+    {
+      MutexLock mu(self, *heap_trim_request_lock_);
+      heap_trim_target_time_ = std::max(heap_trim_target_time_, NanoTime() + delta_time);
+      heap_trim_request_pending_ = true;
+    }
+    // Notify the daemon thread which will actually do the heap trim.
+    SignalHeapTrimDaemon(self);
   }
 }
 
+void Heap::SignalHeapTrimDaemon(Thread* self) {
+  JNIEnv* env = self->GetJniEnv();
+  DCHECK(WellKnownClasses::java_lang_Daemons != nullptr);
+  DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != nullptr);
+  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
+                            WellKnownClasses::java_lang_Daemons_requestHeapTrim);
+  CHECK(!env->ExceptionCheck());
+}
+
 void Heap::RevokeThreadLocalBuffers(Thread* thread) {
   if (rosalloc_space_ != nullptr) {
     rosalloc_space_->RevokeThreadLocalBuffers(thread);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 88adf81..4c4e943 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -31,7 +31,6 @@
 #include "globals.h"
 #include "gtest/gtest.h"
 #include "jni.h"
-#include "locks.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "reference_queue.h"
@@ -135,6 +134,10 @@
   // Used so that we don't overflow the allocation time atomic integer.
   static constexpr size_t kTimeAdjust = 1024;
 
+  // How long we wait after a GC to perform a heap trim (nanoseconds).
+  static constexpr uint64_t kHeapTrimWait = MsToNs(5000);
+  static constexpr uint64_t kHeapTransitionWait = MsToNs(5000);
+
   // Create a heap with the requested sizes. The possible empty
   // image_file_names names specify Spaces to load based on
   // ImageWriter output.
@@ -437,8 +440,12 @@
 
   void DumpForSigQuit(std::ostream& os);
 
+
+  // Do a pending heap transition or trim.
+  void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_);
+
   // Trim the managed and native heaps by releasing unused memory back to the OS.
-  void Trim();
+  void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_);
 
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
@@ -487,6 +494,9 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
+  // Permenantly disable compaction.
+  void DisableCompaction();
+
   space::DlMallocSpace* GetDlMallocSpace() const {
     return dlmalloc_space_;
   }
@@ -572,7 +582,8 @@
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
-                                       mirror::Object* obj);
+                                       mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // We don't force this to be inlined since it is a slow path.
   template <bool kInstrumented, typename PreFenceVisitor>
@@ -636,7 +647,9 @@
   collector::GcType WaitForGcToCompleteLocked(Thread* self)
       EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
-  void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
+  void RequestHeapTransition(CollectorType desired_collector_type, uint64_t delta_time)
+      LOCKS_EXCLUDED(heap_trim_request_lock_);
+  void RequestHeapTrim(uint64_t delta_time) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
@@ -670,7 +683,7 @@
   void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   static void VerificationCallback(mirror::Object* obj, void* arg)
-      SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Swap the allocation stack with the live stack.
   void SwapStacks(Thread* self);
@@ -678,6 +691,10 @@
   // Clear cards and update the mod union table.
   void ProcessCards(TimingLogger& timings);
 
+  // Signal the heap trim daemon that there is something to do, either a heap transition or heap
+  // trim.
+  void SignalHeapTrimDaemon(Thread* self);
+
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object* obj);
 
@@ -730,6 +747,17 @@
   CollectorType post_zygote_collector_type_;
   // Which collector we will use when the app is notified of a transition to background.
   CollectorType background_collector_type_;
+  // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_.
+  CollectorType desired_collector_type_;
+
+  // Lock which guards heap trim requests.
+  Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // When we want to perform the next heap trim (nano seconds).
+  uint64_t heap_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
+  // When we want to perform the next heap transition (nano seconds).
+  uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_);
+  // If we have a heap trim request pending.
+  bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);
 
   // How many GC threads we may use for paused parts of garbage collection.
   const size_t parallel_gc_threads_;
@@ -851,9 +879,6 @@
   // Parallel GC data structures.
   UniquePtr<ThreadPool> thread_pool_;
 
-  // The last time a heap trim occurred.
-  uint64_t last_trim_time_ms_;
-
   // The nanosecond time at which the last GC ended.
   uint64_t last_gc_time_ns_;
 
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index e12a95f..99314ba 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -26,7 +26,6 @@
 #include "globals.h"
 #include "gtest/gtest.h"
 #include "jni.h"
-#include "locks.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "thread_pool.h"
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 2c9d35f..031fccd 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -146,9 +146,6 @@
   byte* AllocBlock(size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
   void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
 
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
-      EXCLUSIVE_LOCKS_REQUIRED(lock_);
-
   // The main block is an unbounded block where objects go when there are no other blocks. This
   // enables us to maintain tightly packed objects when you are not using thread local buffers for
   // allocation. The main block starts at the space Begin().
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 413fc1d..ea0d290 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -75,7 +75,7 @@
   void SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space);
 };
 
-static size_t test_rand(size_t* seed) {
+static inline size_t test_rand(size_t* seed) {
   *seed = *seed * 1103515245 + 12345;
   return *seed;
 }
diff --git a/runtime/globals.h b/runtime/globals.h
index 83e3028..5bc4b91 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -99,6 +99,9 @@
 static constexpr bool kUseBrooksPointer = false;
 #endif
 
+// If true, references within the heap are poisoned (negated).
+static constexpr bool kPoisonHeapReferences = false;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 54c7b6e..c8855e3 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -21,11 +21,38 @@
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils.h"
+#include "verify_object-inl.h"
 
 #include <cstdlib>
 
 namespace art {
 
+template<typename T>
+class MutatorLockedDumpable {
+ public:
+  explicit MutatorLockedDumpable(T& value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : value_(value) {
+  }
+
+  void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    value_.Dump(os);
+  }
+
+ private:
+  T& value_;
+
+  DISALLOW_COPY_AND_ASSIGN(MutatorLockedDumpable);
+};
+
+template<typename T>
+std::ostream& operator<<(std::ostream& os, const MutatorLockedDumpable<T>& rhs)
+// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) however annotalysis
+//       currently fails for this.
+    NO_THREAD_SAFETY_ANALYSIS {
+  rhs.Dump(os);
+  return os;
+}
+
 static void AbortMaybe() {
   // If -Xcheck:jni is on, it'll give a more detailed error before aborting.
   if (!Runtime::Current()->GetJavaVM()->check_jni) {
@@ -81,8 +108,7 @@
   size_t topIndex = segment_state_.parts.topIndex;
 
   CHECK(obj != NULL);
-  // TODO: stronger sanity check on the object (such as in heap)
-  DCHECK_ALIGNED(reinterpret_cast<uintptr_t>(obj), 8);
+  VerifyObject(obj);
   DCHECK(table_ != NULL);
   DCHECK_LE(alloc_entries_, max_entries_);
   DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 9d2fa35..9a8e4f2 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -23,6 +23,7 @@
 #include <string>
 
 #include "base/logging.h"
+#include "base/mutex.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index e04d7b2..e9356e0 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -19,7 +19,7 @@
 
 #include "atomic.h"
 #include "base/macros.h"
-#include "locks.h"
+#include "base/mutex.h"
 
 #include <stdint.h>
 #include <set>
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index fd921f3..7dd06c6 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -17,12 +17,11 @@
 #ifndef ART_RUNTIME_INTERN_TABLE_H_
 #define ART_RUNTIME_INTERN_TABLE_H_
 
-#include "base/mutex.h"
-#include "locks.h"
-#include "object_callbacks.h"
-
 #include <map>
 
+#include "base/mutex.h"
+#include "object_callbacks.h"
+
 namespace art {
 
 enum VisitRootFlags : uint8_t;
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index efe11fc..0750eb5 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_H_
 #define ART_RUNTIME_INTERPRETER_INTERPRETER_H_
 
+#include "base/mutex.h"
 #include "dex_file.h"
-#include "locks.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index fdbdfeb..fec0e31 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -31,11 +31,13 @@
 struct iovec;
 
 namespace art {
-  union JValue;
+
+union JValue;
+class Thread;
+
 namespace mirror {
   class ArtMethod;
 }  // namespace mirror
-class Thread;
 
 namespace JDWP {
 
@@ -156,7 +158,7 @@
   // ObjectId GetWaitForEventThread();
   void SetWaitForEventThread(ObjectId threadId)
       LOCKS_EXCLUDED(event_thread_lock_, process_request_lock_);
-  void ClearWaitForEventThread() LOCKS_EXCLUDED(event_thread_lock);
+  void ClearWaitForEventThread() LOCKS_EXCLUDED(event_thread_lock_);
 
   /*
    * These notify the debug code that something interesting has happened.  This
@@ -334,6 +336,7 @@
 
   // Linked list of events requested by the debugger (breakpoints, class prep, etc).
   Mutex event_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
   JdwpEvent* event_list_ GUARDED_BY(event_list_lock_);
   size_t event_list_size_ GUARDED_BY(event_list_lock_);  // Number of elements in event_list_.
   size_t full_deoptimization_requests_ GUARDED_BY(event_list_lock_);  // Number of events requiring
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 1bcb8dd..4fad5c9 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2466,8 +2466,7 @@
     return JNI_OK;
   }
 
-  static jint MonitorEnter(JNIEnv* env, jobject java_object)
-      EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
+  static jint MonitorEnter(JNIEnv* env, jobject java_object) NO_THREAD_SAFETY_ANALYSIS {
     CHECK_NON_NULL_ARGUMENT(MonitorEnter, java_object);
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(java_object);
@@ -2479,8 +2478,7 @@
     return JNI_OK;
   }
 
-  static jint MonitorExit(JNIEnv* env, jobject java_object)
-      UNLOCK_FUNCTION(monitor_lock_) {
+  static jint MonitorExit(JNIEnv* env, jobject java_object) NO_THREAD_SAFETY_ANALYSIS {
     CHECK_NON_NULL_ARGUMENT(MonitorExit, java_object);
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(java_object);
diff --git a/runtime/locks.cc b/runtime/locks.cc
deleted file mode 100644
index 246e339..0000000
--- a/runtime/locks.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "locks.h"
-
-#include "base/mutex.h"
-
-namespace art {
-
-Mutex* Locks::abort_lock_ = NULL;
-Mutex* Locks::breakpoint_lock_ = NULL;
-Mutex* Locks::deoptimization_lock_ = NULL;
-ReaderWriterMutex* Locks::classlinker_classes_lock_ = NULL;
-ReaderWriterMutex* Locks::heap_bitmap_lock_ = NULL;
-Mutex* Locks::logging_lock_ = NULL;
-ReaderWriterMutex* Locks::mutator_lock_ = NULL;
-Mutex* Locks::runtime_shutdown_lock_ = NULL;
-Mutex* Locks::thread_list_lock_ = NULL;
-Mutex* Locks::thread_suspend_count_lock_ = NULL;
-Mutex* Locks::trace_lock_ = NULL;
-Mutex* Locks::profiler_lock_ = NULL;
-Mutex* Locks::unexpected_signal_lock_ = NULL;
-Mutex* Locks::intern_table_lock_ = NULL;
-
-void Locks::Init() {
-  if (logging_lock_ != NULL) {
-    // Already initialized.
-    DCHECK(abort_lock_ != NULL);
-    DCHECK(breakpoint_lock_ != NULL);
-    DCHECK(deoptimization_lock_ != NULL);
-    DCHECK(classlinker_classes_lock_ != NULL);
-    DCHECK(heap_bitmap_lock_ != NULL);
-    DCHECK(logging_lock_ != NULL);
-    DCHECK(mutator_lock_ != NULL);
-    DCHECK(thread_list_lock_ != NULL);
-    DCHECK(thread_suspend_count_lock_ != NULL);
-    DCHECK(trace_lock_ != NULL);
-    DCHECK(profiler_lock_ != NULL);
-    DCHECK(unexpected_signal_lock_ != NULL);
-    DCHECK(intern_table_lock_ != NULL);
-  } else {
-    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
-    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
-
-    DCHECK(breakpoint_lock_ == NULL);
-    breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock);
-    DCHECK(deoptimization_lock_ == NULL);
-    deoptimization_lock_ = new Mutex("deoptimization lock", kDeoptimizationLock);
-    DCHECK(classlinker_classes_lock_ == NULL);
-    classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
-                                                      kClassLinkerClassesLock);
-    DCHECK(heap_bitmap_lock_ == NULL);
-    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
-    DCHECK(mutator_lock_ == NULL);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
-    DCHECK(runtime_shutdown_lock_ == NULL);
-    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock);
-    DCHECK(thread_list_lock_ == NULL);
-    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
-    DCHECK(thread_suspend_count_lock_ == NULL);
-    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
-    DCHECK(trace_lock_ == NULL);
-    trace_lock_ = new Mutex("trace lock", kTraceLock);
-    DCHECK(profiler_lock_ == NULL);
-    profiler_lock_ = new Mutex("profiler lock", kProfilerLock);
-    DCHECK(unexpected_signal_lock_ == NULL);
-    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
-    DCHECK(intern_table_lock_ == NULL);
-    intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock);
-  }
-}
-
-}  // namespace art
diff --git a/runtime/locks.h b/runtime/locks.h
deleted file mode 100644
index 4343ab4..0000000
--- a/runtime/locks.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_LOCKS_H_
-#define ART_RUNTIME_LOCKS_H_
-
-#include <ostream>
-
-#include "base/macros.h"
-
-namespace art {
-
-class LOCKABLE Mutex;
-class LOCKABLE ReaderWriterMutex;
-
-// LockLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or
-// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free
-// partial ordering and thereby cause deadlock situations to fail checks.
-//
-// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163
-enum LockLevel {
-  kLoggingLock = 0,
-  kUnexpectedSignalLock,
-  kThreadSuspendCountLock,
-  kAbortLock,
-  kJdwpSocketLock,
-  kRosAllocGlobalLock,
-  kRosAllocBracketLock,
-  kRosAllocBulkFreeLock,
-  kAllocSpaceLock,
-  kDexFileMethodInlinerLock,
-  kDexFileToMethodInlinerMapLock,
-  kMarkSweepMarkStackLock,
-  kTransactionLogLock,
-  kInternTableLock,
-  kMonitorPoolLock,
-  kDefaultMutexLevel,
-  kMarkSweepLargeObjectLock,
-  kPinTableLock,
-  kLoadLibraryLock,
-  kJdwpObjectRegistryLock,
-  kClassLinkerClassesLock,
-  kBreakpointLock,
-  kMonitorLock,
-  kThreadListLock,
-  kBreakpointInvokeLock,
-  kDeoptimizationLock,
-  kTraceLock,
-  kProfilerLock,
-  kJdwpEventListLock,
-  kJdwpAttachLock,
-  kJdwpStartLock,
-  kRuntimeShutdownLock,
-  kHeapBitmapLock,
-  kMutatorLock,
-  kZygoteCreationLock,
-
-  kLockLevelCount  // Must come last.
-};
-std::ostream& operator<<(std::ostream& os, const LockLevel& rhs);
-
-// Global mutexes corresponding to the levels above.
-class Locks {
- public:
-  static void Init();
-
-  // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
-  // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
-  // a share on the mutator_lock_. The garbage collector may also execute with shared access but
-  // at times requires exclusive access to the heap (not to be confused with the heap meta-data
-  // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks
-  // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_
-  // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition
-  // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on
-  // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector)
-  // chance to acquire the lock.
-  //
-  // Thread suspension:
-  // Shared users                                  | Exclusive user
-  // (holding mutator lock and in kRunnable state) |   .. running ..
-  //   .. running ..                               | Request thread suspension by:
-  //   .. running ..                               |   - acquiring thread_suspend_count_lock_
-  //   .. running ..                               |   - incrementing Thread::suspend_count_ on
-  //   .. running ..                               |     all mutator threads
-  //   .. running ..                               |   - releasing thread_suspend_count_lock_
-  //   .. running ..                               | Block trying to acquire exclusive mutator lock
-  // Poll Thread::suspend_count_ and enter full    |   .. blocked ..
-  // suspend code.                                 |   .. blocked ..
-  // Change state to kSuspended                    |   .. blocked ..
-  // x: Release share on mutator_lock_             | Carry out exclusive access
-  // Acquire thread_suspend_count_lock_            |   .. exclusive ..
-  // while Thread::suspend_count_ > 0              |   .. exclusive ..
-  //   - wait on Thread::resume_cond_              |   .. exclusive ..
-  //     (releases thread_suspend_count_lock_)     |   .. exclusive ..
-  //   .. waiting ..                               | Release mutator_lock_
-  //   .. waiting ..                               | Request thread resumption by:
-  //   .. waiting ..                               |   - acquiring thread_suspend_count_lock_
-  //   .. waiting ..                               |   - decrementing Thread::suspend_count_ on
-  //   .. waiting ..                               |     all mutator threads
-  //   .. waiting ..                               |   - notifying on Thread::resume_cond_
-  //    - re-acquire thread_suspend_count_lock_    |   - releasing thread_suspend_count_lock_
-  // Release thread_suspend_count_lock_            |  .. running ..
-  // Acquire share on mutator_lock_                |  .. running ..
-  //  - This could block but the thread still      |  .. running ..
-  //    has a state of kSuspended and so this      |  .. running ..
-  //    isn't an issue.                            |  .. running ..
-  // Acquire thread_suspend_count_lock_            |  .. running ..
-  //  - we poll here as we're transitioning into   |  .. running ..
-  //    kRunnable and an individual thread suspend |  .. running ..
-  //    request (e.g for debugging) won't try      |  .. running ..
-  //    to acquire the mutator lock (which would   |  .. running ..
-  //    block as we hold the mutator lock). This   |  .. running ..
-  //    poll ensures that if the suspender thought |  .. running ..
-  //    we were suspended by incrementing our      |  .. running ..
-  //    Thread::suspend_count_ and then reading    |  .. running ..
-  //    our state we go back to waiting on         |  .. running ..
-  //    Thread::resume_cond_.                      |  .. running ..
-  // can_go_runnable = Thread::suspend_count_ == 0 |  .. running ..
-  // Release thread_suspend_count_lock_            |  .. running ..
-  // if can_go_runnable                            |  .. running ..
-  //   Change state to kRunnable                   |  .. running ..
-  // else                                          |  .. running ..
-  //   Goto x                                      |  .. running ..
-  //  .. running ..                                |  .. running ..
-  static ReaderWriterMutex* mutator_lock_;
-
-  // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
-  static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
-
-  // Guards shutdown of the runtime.
-  static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
-
-  // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
-  // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
-
-  // Guards breakpoints.
-  static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_);
-
-  // Guards deoptimization requests.
-  static Mutex* deoptimization_lock_ ACQUIRED_AFTER(breakpoint_lock_);
-
-  // Guards trace requests.
-  static Mutex* trace_lock_ ACQUIRED_AFTER(deoptimization_lock_);
-
-  // Guards profile objects.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_);
-
-  // Guards lists of classes within the class linker.
-  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_);
-
-  // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
-  // doesn't try to hold a higher level Mutex.
-  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(classlinker_classes_lock_)
-
-  // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
-
-  // Have an exclusive aborting thread.
-  static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
-
-  // Allow mutual exclusion when manipulating Thread::suspend_count_.
-  // TODO: Does the trade-off of a per-thread lock make sense?
-  static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_);
-
-  // One unexpected signal at a time lock.
-  static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_);
-
-  // Have an exclusive logging thread.
-  static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_LOCKS_H_
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index a18e1719..a61698d 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -20,7 +20,6 @@
 #include "class.h"
 #include "dex_file.h"
 #include "invoke_type.h"
-#include "locks.h"
 #include "modifiers.h"
 #include "object.h"
 #include "object_callbacks.h"
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index ded4e0a..4e2c624 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -21,6 +21,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "cutils/atomic-inline.h"
+#include "monitor.h"
 #include "object_reference.h"
 #include "offsets.h"
 #include "runtime.h"
@@ -30,7 +31,6 @@
 
 class ImageWriter;
 class LockWord;
-class Monitor;
 struct ObjectOffsets;
 class Thread;
 template <typename T> class SirtRef;
@@ -64,7 +64,7 @@
 static constexpr bool kCheckFieldAssignments = false;
 
 // C++ mirror of java.lang.Object
-class MANAGED Object {
+class MANAGED LOCKABLE Object {
  public:
   static MemberOffset ClassOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Object, klass_);
@@ -104,9 +104,9 @@
   uint32_t GetLockOwnerThreadId();
 
   mirror::Object* MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
+      EXCLUSIVE_LOCK_FUNCTION();
   bool MonitorExit(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      UNLOCK_FUNCTION(monitor_lock_);
+      UNLOCK_FUNCTION();
   void Notify(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void NotifyAll(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Wait(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index b30890f..72f281d 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -17,7 +17,8 @@
 #ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 
-#include "locks.h"
+#include "base/mutex.h"
+#include "globals.h"
 
 namespace art {
 namespace mirror {
@@ -74,7 +75,7 @@
 
 // References between objects within the managed heap.
 template<class MirrorType>
-class MANAGED HeapReference : public ObjectReference<false, MirrorType> {
+class MANAGED HeapReference : public ObjectReference<kPoisonHeapReferences, MirrorType> {
  public:
   static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -82,7 +83,7 @@
   }
  private:
   HeapReference<MirrorType>(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : ObjectReference<false, MirrorType>(mirror_ptr) {}
+      : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
 };
 
 }  // namespace mirror
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 64794fe..332aef0 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -650,9 +650,22 @@
   }
 }
 
+// Fool annotalysis into thinking that the lock on obj is acquired.
+static mirror::Object* FakeLock(mirror::Object* obj)
+    EXCLUSIVE_LOCK_FUNCTION(obj) NO_THREAD_SAFETY_ANALYSIS {
+  return obj;
+}
+
+// Fool annotalysis into thinking that the lock on obj is release.
+static mirror::Object* FakeUnlock(mirror::Object* obj)
+    UNLOCK_FUNCTION(obj) NO_THREAD_SAFETY_ANALYSIS {
+  return obj;
+}
+
 mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
+  obj = FakeLock(obj);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
   SirtRef<mirror::Object> sirt_obj(self, obj);
@@ -698,24 +711,22 @@
         mon->Lock(self);
         return sirt_obj.get();  // Success!
       }
-      case LockWord::kHashCode: {
+      case LockWord::kHashCode:
         // Inflate with the existing hashcode.
         Inflate(self, nullptr, sirt_obj.get(), lock_word.GetHashCode());
-        break;
-      }
+        continue;  // Start from the beginning.
       default: {
         LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
         return sirt_obj.get();
       }
     }
   }
-  return sirt_obj.get();
 }
 
 bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
-
+  obj = FakeUnlock(obj);
   LockWord lock_word = obj->GetLockWord();
   SirtRef<mirror::Object> sirt_obj(self, obj);
   switch (lock_word.GetState()) {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index d0a3a2e..eb07196 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -27,7 +27,6 @@
 #include "atomic.h"
 #include "base/mutex.h"
 #include "object_callbacks.h"
-#include "sirt_ref.h"
 #include "thread_state.h"
 
 namespace art {
@@ -37,6 +36,7 @@
   class Object;
 }  // namespace mirror
 class LockWord;
+template<class T> class SirtRef;
 class Thread;
 class StackVisitor;
 
@@ -58,11 +58,11 @@
       NO_THREAD_SAFETY_ANALYSIS;  // TODO: Reading lock owner without holding lock is racy.
 
   static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj)
-      EXCLUSIVE_LOCK_FUNCTION(monitor_lock_)
+      EXCLUSIVE_LOCK_FUNCTION(obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static bool MonitorExit(Thread* thread, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      UNLOCK_FUNCTION(monitor_lock_);
+      UNLOCK_FUNCTION(obj);
 
   static void Notify(Thread* self, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -178,6 +178,7 @@
   static uint32_t lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
   ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
   // Number of people waiting on the condition.
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 19e569d..eb7525a 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -18,6 +18,7 @@
 
 #include "base/logging.h"
 #include "base/mutex-inl.h"
+#include "thread-inl.h"
 #include "monitor.h"
 
 namespace art {
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 32f3f4e..82d0fee 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -17,12 +17,15 @@
 #ifndef ART_RUNTIME_MONITOR_POOL_H_
 #define ART_RUNTIME_MONITOR_POOL_H_
 
-#include "monitor.h"
-
-#include "safe_map.h"
-
+#ifdef __LP64__
+#include <bitset>
 #include <stdint.h>
 
+#include "monitor.h"
+#include "runtime.h"
+#include "safe_map.h"
+#endif
+
 namespace art {
 
 // Abstraction to keep monitors small enough to fit in a lock word (32bits). On 32bit systems the
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index f48e8ad..4aa1d10 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -177,6 +177,8 @@
           << targetSdkVersion << "...";
 
       vm->work_around_app_jni_bugs = true;
+      LOG(WARNING) << "Permenantly disabling heap compaction due to jni workarounds";
+      Runtime::Current()->GetHeap()->DisableCompaction();
     }
   }
 }
@@ -204,12 +206,11 @@
 }
 
 static void VMRuntime_trimHeap(JNIEnv*, jobject) {
-  Runtime::Current()->GetHeap()->Trim();
+  Runtime::Current()->GetHeap()->DoPendingTransitionOrTrim();
 }
 
 static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
-  Thread* self = ThreadForEnv(env);
-  Runtime::Current()->GetHeap()->ConcurrentGC(self);
+  Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env));
 }
 
 typedef std::map<std::string, mirror::String*> StringTable;
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 794878a..374a80e 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_NTH_CALLER_VISITOR_H_
 #define ART_RUNTIME_NTH_CALLER_VISITOR_H_
 
+#include "base/mutex.h"
 #include "mirror/art_method.h"
-#include "locks.h"
 #include "stack.h"
 
 namespace art {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 04f1a05..37db462 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -147,7 +147,13 @@
 
   compiler_callbacks_ = nullptr;
   is_zygote_ = false;
-  interpreter_only_ = false;
+  if (kPoisonHeapReferences) {
+    // kPoisonHeapReferences currently works only with the interpreter only.
+    // TODO: make it work with the compiler.
+    interpreter_only_ = true;
+  } else {
+    interpreter_only_ = false;
+  }
   is_explicit_gc_disabled_ = false;
 
   long_pause_log_threshold_ = gc::Heap::kDefaultLongPauseLogThreshold;
diff --git a/runtime/profiler.h b/runtime/profiler.h
index e3af47c..6ea6c84 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -22,15 +22,14 @@
 #include <string>
 #include <vector>
 
+#include "barrier.h"
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "globals.h"
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "base/mutex.h"
-#include "locks.h"
 #include "UniquePtr.h"
-#include "barrier.h"
 
 namespace art {
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index c9f5bc5..45309c9 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
+#include "base/mutex.h"
 #include "object_callbacks.h"
-#include "locks.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index de06fb8..fdbf245 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -94,7 +94,7 @@
       default_imt_(nullptr),
       fault_message_lock_("Fault message lock"),
       fault_message_(""),
-      method_verifiers_lock_("Method verifiers lock"),
+      method_verifier_lock_("Method verifiers lock"),
       threads_being_born_(0),
       shutdown_cond_(new ConditionVariable("Runtime shutdown", *Locks::runtime_shutdown_lock_)),
       shutting_down_(false),
@@ -851,7 +851,7 @@
     }
   }
   {
-    MutexLock mu(Thread::Current(), method_verifiers_lock_);
+    MutexLock mu(Thread::Current(), method_verifier_lock_);
     for (verifier::MethodVerifier* verifier : method_verifiers_) {
       verifier->VisitRoots(callback, arg);
     }
@@ -1043,13 +1043,13 @@
 
 void Runtime::AddMethodVerifier(verifier::MethodVerifier* verifier) {
   DCHECK(verifier != nullptr);
-  MutexLock mu(Thread::Current(), method_verifiers_lock_);
+  MutexLock mu(Thread::Current(), method_verifier_lock_);
   method_verifiers_.insert(verifier);
 }
 
 void Runtime::RemoveMethodVerifier(verifier::MethodVerifier* verifier) {
   DCHECK(verifier != nullptr);
-  MutexLock mu(Thread::Current(), method_verifiers_lock_);
+  MutexLock mu(Thread::Current(), method_verifier_lock_);
   auto it = method_verifiers_.find(verifier);
   CHECK(it != method_verifiers_.end());
   method_verifiers_.erase(it);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 87307ae..65d296a 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -33,7 +33,6 @@
 #include "instruction_set.h"
 #include "instrumentation.h"
 #include "jobject_comparator.h"
-#include "locks.h"
 #include "object_callbacks.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
@@ -471,7 +470,7 @@
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
 
   // Method verifier set, used so that we can update their GC roots.
-  Mutex method_verifiers_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  Mutex method_verifier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::set<verifier::MethodVerifier*> method_verifiers_;
 
   // A non-zero value indicates that a thread has been created but not yet initialized. Guarded by
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index b22e816..38e652a 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -20,6 +20,7 @@
 #include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "stack_indirect_reference_table.h"
 #include "thread.h"
 
 namespace art {
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index f7e88cc..66077f9 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -146,9 +146,10 @@
     if (UNLIKELY(!done)) {
       // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
       Locks::mutator_lock_->SharedUnlock(this);
+    } else {
+      return static_cast<ThreadState>(old_state);
     }
-  } while (UNLIKELY(!done));
-  return static_cast<ThreadState>(old_state);
+  } while (true);
 }
 
 inline void Thread::VerifyStack() {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 0ad0190..a50fa00 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1196,7 +1196,7 @@
   // The "kinds" below are sorted by the frequency we expect to encounter them.
   if (kind == kLocal) {
     IndirectReferenceTable& locals = jni_env_->locals;
-    result = const_cast<mirror::Object*>(locals.Get(ref));
+    result = locals.Get(ref);
   } else if (kind == kSirtOrInvalid) {
     // TODO: make stack indirect reference table lookup more efficient.
     // Check if this is a local reference in the SIRT.
diff --git a/runtime/thread.h b/runtime/thread.h
index c7ab735..2ebc107 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -24,13 +24,13 @@
 #include <string>
 
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "globals.h"
 #include "jvalue.h"
-#include "locks.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "runtime_stats.h"
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index d311945..bddebbd 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -151,7 +151,8 @@
 
 #if HAVE_TIMED_RWLOCK
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
-static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn));
+static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) {
   Runtime* runtime = Runtime::Current();
   std::ostringstream ss;
   ss << "Thread suspend timeout\n";
@@ -159,6 +160,7 @@
   ss << "\n";
   runtime->GetThreadList()->DumpLocked(ss);
   LOG(FATAL) << ss.str();
+  exit(0);
 }
 #endif
 
@@ -193,10 +195,10 @@
 
 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
   Thread* self = Thread::Current();
-  if (kIsDebugBuild) {
-    Locks::mutator_lock_->AssertNotExclusiveHeld(self);
-    Locks::thread_list_lock_->AssertNotHeld(self);
-    Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
+  Locks::thread_list_lock_->AssertNotHeld(self);
+  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+  if (kDebugLocking) {
     CHECK_NE(self->GetState(), kRunnable);
   }
 
@@ -273,41 +275,41 @@
 
   VLOG(threads) << *self << " SuspendAll starting...";
 
-  if (kIsDebugBuild) {
-    Locks::mutator_lock_->AssertNotHeld(self);
-    Locks::thread_list_lock_->AssertNotHeld(self);
-    Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+  Locks::mutator_lock_->AssertNotHeld(self);
+  Locks::thread_list_lock_->AssertNotHeld(self);
+  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+  if (kDebugLocking) {
     CHECK_NE(self->GetState(), kRunnable);
   }
   {
     MutexLock mu(self, *Locks::thread_list_lock_);
-    {
-      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-      // Update global suspend all state for attaching threads.
-      ++suspend_all_count_;
-      // Increment everybody's suspend count (except our own).
-      for (const auto& thread : list_) {
-        if (thread == self) {
-          continue;
-        }
-        VLOG(threads) << "requesting thread suspend: " << *thread;
-        thread->ModifySuspendCount(self, +1, false);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    // Update global suspend all state for attaching threads.
+    ++suspend_all_count_;
+    // Increment everybody's suspend count (except our own).
+    for (const auto& thread : list_) {
+      if (thread == self) {
+        continue;
       }
+      VLOG(threads) << "requesting thread suspend: " << *thread;
+      thread->ModifySuspendCount(self, +1, false);
     }
   }
 
   // Block on the mutator lock until all Runnable threads release their share of access.
 #if HAVE_TIMED_RWLOCK
   // Timeout if we wait more than 30 seconds.
-  if (UNLIKELY(!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0))) {
+  if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
     UnsafeLogFatalForThreadSuspendAllTimeout(self);
   }
 #else
   Locks::mutator_lock_->ExclusiveLock(self);
 #endif
 
-  // Debug check that all threads are suspended.
-  AssertThreadsAreSuspended(self, self);
+  if (kDebugLocking) {
+    // Debug check that all threads are suspended.
+    AssertThreadsAreSuspended(self, self);
+  }
 
   VLOG(threads) << *self << " SuspendAll complete";
 }
@@ -317,8 +319,10 @@
 
   VLOG(threads) << *self << " ResumeAll starting";
 
-  // Debug check that all threads are suspended.
-  AssertThreadsAreSuspended(self, self);
+  if (kDebugLocking) {
+    // Debug check that all threads are suspended.
+    AssertThreadsAreSuspended(self, self);
+  }
 
   Locks::mutator_lock_->ExclusiveUnlock(self);
   {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index e98aed9..1a76705 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -86,7 +86,7 @@
 
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
   // of the suspend check. Returns how many checkpoints we should expect to run.
-  size_t RunCheckpoint(Closure* checkpoint_function);
+  size_t RunCheckpoint(Closure* checkpoint_function)
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index e8f9afe..b8735a3 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -23,7 +23,6 @@
 #include "barrier.h"
 #include "base/mutex.h"
 #include "closure.h"
-#include "locks.h"
 #include "mem_map.h"
 
 namespace art {
diff --git a/runtime/throw_location.h b/runtime/throw_location.h
index f30aa4e..c171b07 100644
--- a/runtime/throw_location.h
+++ b/runtime/throw_location.h
@@ -19,6 +19,7 @@
 
 #include "object_callbacks.h"
 #include "base/macros.h"
+#include "base/mutex.h"
 
 #include <stdint.h>
 #include <string>
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 68f9540..cf696de 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -19,10 +19,9 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
 #include "primitive.h"
-#include "object_callbacks.h"
 #include "safe_map.h"
 
 #include <list>
diff --git a/runtime/utf.h b/runtime/utf.h
index 5b2289e..29f8499 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_UTF_H_
 
 #include "base/macros.h"
+#include "base/mutex.h"
 
 #include <stddef.h>
 #include <stdint.h>
diff --git a/runtime/verify_object.h b/runtime/verify_object.h
index b39df4a..6640e0d 100644
--- a/runtime/verify_object.h
+++ b/runtime/verify_object.h
@@ -17,10 +17,10 @@
 #ifndef ART_RUNTIME_VERIFY_OBJECT_H_
 #define ART_RUNTIME_VERIFY_OBJECT_H_
 
-#include "locks.h"
-
 #include <stdint.h>
 
+#include "base/macros.h"
+
 namespace art {
 
 namespace mirror {
@@ -52,10 +52,10 @@
 static constexpr VerifyObjectMode kVerifyObjectSupport =
     kDefaultVerifyFlags != 0 ? kVerifyObjectModeFast : kVerifyObjectModeDisabled;
 
-ALWAYS_INLINE inline void VerifyObject(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
+void VerifyObject(mirror::Object* obj) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS;
 
 // Check that c.getClass() == c.getClass().getClass().
-ALWAYS_INLINE inline bool VerifyClassClass(mirror::Class* c) NO_THREAD_SAFETY_ANALYSIS;
+bool VerifyClassClass(mirror::Class* c) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS;
 
 }  // namespace art