Merge "Revert "Revert "Revert "Revert "Use the object class as top in reference type propagation"""""
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index c98a5f8..88dc29e 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -183,7 +183,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(113 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 3e6a263..069a7a4 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -386,6 +386,7 @@
     StartAttributeStream("recursive") << std::boolalpha
                                       << invoke->IsRecursive()
                                       << std::noboolalpha;
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
   }
 
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index d6b8aa4..cfb1868 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -24,6 +24,7 @@
 #include "driver/compiler_driver-inl.h"
 #include "driver/dex_compilation_unit.h"
 #include "instruction_simplifier.h"
+#include "intrinsics.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
@@ -356,8 +357,10 @@
   HConstantFolding fold(callee_graph);
   ReferenceTypePropagation type_propagation(callee_graph, handles_);
   InstructionSimplifier simplify(callee_graph, stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
 
   HOptimization* optimizations[] = {
+    &intrinsics,
     &dce,
     &fold,
     &type_propagation,
diff --git a/runtime/Android.mk b/runtime/Android.mk
index fe79e72..4a944963 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -39,6 +39,7 @@
   base/unix_file/random_access_file_utils.cc \
   check_jni.cc \
   class_linker.cc \
+  class_table.cc \
   common_throws.cc \
   debugger.cc \
   dex_file.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 2f2654d..be9af98 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -171,6 +171,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 2000110..f6d954f 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -51,7 +51,6 @@
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .cfi_adjust_cfa_offset 12
     RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
@@ -79,7 +78,6 @@
     sub sp, #4                                    @ bottom word will hold Method*
     .cfi_adjust_cfa_offset 4
     RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
@@ -139,7 +137,6 @@
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
-    THIS_LOAD_REQUIRES_READ_BARRIER
      @ rTemp1 is kRefsAndArgs Method*.
     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
@@ -171,7 +168,6 @@
     .cfi_adjust_cfa_offset -40
 .endm
 
-
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -588,6 +584,59 @@
     bkpt
 END art_quick_check_cast
 
+// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
+.macro POP_REG_NE rReg, offset, rExclude
+    .ifnc \rReg, \rExclude
+        ldr \rReg, [sp, #\offset]   @ restore rReg
+        .cfi_restore \rReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER rDest, rObj, offset
+#ifdef USE_READ_BARRIER
+    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset ip, 16
+    .cfi_rel_offset lr, 20
+    sub sp, #8                      @ push padding
+    .cfi_adjust_cfa_offset 8
+    @ mov r0, r0                    @ pass ref in r0 (no-op for now since parameter ref is unused)
+    .ifnc \rObj, r1
+        mov r1, \rObj               @ pass rObj
+    .endif
+    mov r2, #\offset                @ pass offset
+    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
+    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc \rDest, r0
+        mov \rDest, r0              @ save return value in rDest
+    .endif
+    add sp, #8                      @ pop padding
+    .cfi_adjust_cfa_offset -8
+    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
+    POP_REG_NE r1, 4, \rDest
+    POP_REG_NE r2, 8, \rDest
+    POP_REG_NE r3, 12, \rDest
+    POP_REG_NE ip, 16, \rDest
+    add sp, #20
+    .cfi_adjust_cfa_offset -20
+    pop {lr}                        @ restore lr
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore lr
+#else
+    ldr \rDest, [\rObj, #\offset]
+    UNPOISON_HEAP_REF \rDest
+#endif  // USE_READ_BARRIER
+.endm
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -609,15 +658,21 @@
     b art_quick_throw_array_bounds
 END art_quick_aput_obj_with_bound_check
 
+#ifdef USE_READ_BARRIER
+    .extern artReadBarrierSlow
+#endif
     .hidden art_quick_aput_obj
 ENTRY art_quick_aput_obj
+#ifdef USE_READ_BARRIER
+    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
+    tst r2, r2
+    beq .Ldo_aput_null
+#else
     cbz r2, .Ldo_aput_null
-    ldr r3, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
-    UNPOISON_HEAP_REF r3
-    ldr ip, [r2, #MIRROR_OBJECT_CLASS_OFFSET]
-    UNPOISON_HEAP_REF ip
-    ldr r3, [r3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
-    UNPOISON_HEAP_REF r3
+#endif  // USE_READ_BARRIER
+    READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
     cmp r3, ip  @ value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 2ce2a29..0f06727 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -155,6 +155,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6d9b44a..548ab47 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -31,8 +31,6 @@
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
-
     // Loads appropriate callee-save-method.
     ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
@@ -95,8 +93,6 @@
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
-
     // Loads appropriate callee-save-method.
     ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
@@ -251,7 +247,6 @@
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
@@ -1119,6 +1114,62 @@
     brk 0                             // We should not return here...
 END art_quick_check_cast
 
+// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
+.macro POP_REG_NE xReg, offset, xExclude
+    .ifnc \xReg, \xExclude
+        ldr \xReg, [sp, #\offset]     // restore xReg
+        .cfi_restore \xReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * xDest, wDest and xObj are registers, offset is a defined literal such as
+     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
+     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER xDest, wDest, xObj, offset
+#ifdef USE_READ_BARRIER
+    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
+    stp x0, x1, [sp, #-48]!
+    .cfi_adjust_cfa_offset 48
+    .cfi_rel_offset x0, 0
+    .cfi_rel_offset x1, 8
+    stp x2, x3, [sp, #16]
+    .cfi_rel_offset x2, 16
+    .cfi_rel_offset x3, 24
+    stp x4, xLR, [sp, #32]
+    .cfi_rel_offset x4, 32
+    .cfi_rel_offset x30, 40
+
+    // mov x0, x0                   // pass ref in x0 (no-op for now since parameter ref is unused)
+    .ifnc \xObj, x1
+        mov x1, \xObj               // pass xObj
+    .endif
+    mov w2, #\offset                // pass offset
+    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
+    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc \wDest, w0
+        mov \wDest, w0              // save return value in wDest
+    .endif
+
+    // Conditionally restore saved registers
+    POP_REG_NE x0, 0, \xDest
+    POP_REG_NE x1, 8, \xDest
+    POP_REG_NE x2, 16, \xDest
+    POP_REG_NE x3, 24, \xDest
+    POP_REG_NE x4, 32, \xDest
+    ldr xLR, [sp, #40]
+    .cfi_restore x30
+    add sp, sp, #48
+    .cfi_adjust_cfa_offset -48
+#else
+    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
+    UNPOISON_HEAP_REF \wDest
+#endif  // USE_READ_BARRIER
+.endm
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -1146,17 +1197,17 @@
     b art_quick_throw_array_bounds
 END art_quick_aput_obj_with_bound_check
 
+#ifdef USE_READ_BARRIER
+    .extern artReadBarrierSlow
+#endif
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    ldr w3, [x0, #MIRROR_OBJECT_CLASS_OFFSET]            // Heap reference = 32b
+    READ_BARRIER x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
                                                          // This also zero-extends to x3
-    UNPOISON_HEAP_REF w3
-    ldr w4, [x2, #MIRROR_OBJECT_CLASS_OFFSET]            // Heap reference = 32b
+    READ_BARRIER x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
                                                          // This also zero-extends to x4
-    UNPOISON_HEAP_REF w4
-    ldr w3, [x3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Heap reference = 32b
+    READ_BARRIER x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET // Heap reference = 32b
                                                          // This also zero-extends to x3
-    UNPOISON_HEAP_REF w3
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index b1aa3ee..f9c5315 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -44,7 +44,8 @@
       entrypoint == kQuickCmpgDouble ||
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
-      entrypoint == kQuickCmplFloat;
+      entrypoint == kQuickCmplFloat ||
+      entrypoint == kQuickReadBarrierSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 09a018e..4e4b91f 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -279,6 +279,8 @@
 
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 2819f92..4d5004f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -79,7 +79,6 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -127,7 +126,6 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -219,7 +217,6 @@
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -627,6 +624,76 @@
 END art_quick_check_cast
 
     /*
+     * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
+     * nReg is the register number for rReg.
+     */
+.macro POP_REG_NE rReg, nReg, offset, rExclude
+    .ifnc \rReg, \rExclude
+        lw \rReg, \offset($sp)      # restore rReg
+        .cfi_restore \nReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER rDest, rObj, offset
+#ifdef USE_READ_BARRIER
+    # saved registers used in art_quick_aput_obj: a0-a2, t0-t1, t9, ra. 8 words for 16B alignment.
+    addiu  $sp, $sp, -32
+    .cfi_adjust_cfa_offset 32
+    sw     $ra, 28($sp)
+    .cfi_rel_offset 31, 28
+    sw     $t9, 24($sp)
+    .cfi_rel_offset 25, 24
+    sw     $t1, 20($sp)
+    .cfi_rel_offset 9, 20
+    sw     $t0, 16($sp)
+    .cfi_rel_offset 8, 16
+    sw     $a2, 8($sp)              # padding slot at offset 12 (padding can be any slot in the 32B)
+    .cfi_rel_offset 6, 8
+    sw     $a1, 4($sp)
+    .cfi_rel_offset 5, 4
+    sw     $a0, 0($sp)
+    .cfi_rel_offset 4, 0
+
+    # move $a0, $a0                 # pass ref in a0 (no-op for now since parameter ref is unused)
+    .ifnc \rObj, $a1
+        move $a1, \rObj             # pass rObj
+    .endif
+    addiu $a2, $zero, \offset       # pass offset
+    jal artReadBarrierSlow          # artReadBarrierSlow(ref, rObj, offset)
+    addiu  $sp, $sp, -16            # Use branch delay slot to reserve argument slots on the stack
+                                    # before the call to artReadBarrierSlow.
+    addiu  $sp, $sp, 16             # restore stack after call to artReadBarrierSlow
+    # No need to unpoison return value in v0, artReadBarrierSlow() would do the unpoisoning.
+    move \rDest, $v0                # save return value in rDest
+                                    # (rDest cannot be v0 in art_quick_aput_obj)
+
+    lw     $a0, 0($sp)              # restore registers except rDest
+                                    # (rDest can only be t0 or t1 in art_quick_aput_obj)
+    .cfi_restore 4
+    lw     $a1, 4($sp)
+    .cfi_restore 5
+    lw     $a2, 8($sp)
+    .cfi_restore 6
+    POP_REG_NE $t0, 8, 16, \rDest
+    POP_REG_NE $t1, 9, 20, \rDest
+    lw     $t9, 24($sp)
+    .cfi_restore 25
+    lw     $ra, 28($sp)             # restore $ra
+    .cfi_restore 31
+    addiu  $sp, $sp, 32
+    .cfi_adjust_cfa_offset -32
+#else
+    lw     \rDest, \offset(\rObj)
+    UNPOISON_HEAP_REF \rDest
+#endif  // USE_READ_BARRIER
+.endm
+
+    /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
      * a0 = array, a1 = index, a2 = value
@@ -648,15 +715,15 @@
     move $a1, $t0
 END art_quick_aput_obj_with_bound_check
 
+#ifdef USE_READ_BARRIER
+    .extern artReadBarrierSlow
+#endif
 ENTRY art_quick_aput_obj
     beqz $a2, .Ldo_aput_null
     nop
-    lw $t0, MIRROR_OBJECT_CLASS_OFFSET($a0)
-    UNPOISON_HEAP_REF $t0
-    lw $t1, MIRROR_OBJECT_CLASS_OFFSET($a2)
-    UNPOISON_HEAP_REF $t1
-    lw $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET($t0)
-    UNPOISON_HEAP_REF $t0
+    READ_BARRIER $t0, $a0, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t1, $a2, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t0, $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
     bne $t1, $t0, .Lcheck_assignability  # value's type == array's component type - trivial assignability
     nop
 .Ldo_aput:
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 4904af9..ec02d5a 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -186,6 +186,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index abca70b..c30e6ca 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -89,7 +89,6 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -132,7 +131,6 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ld      $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -255,7 +253,6 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ld      $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -888,6 +885,77 @@
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_check_cast
 
+
+    /*
+     * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
+     * nReg is the register number for rReg.
+     */
+.macro POP_REG_NE rReg, nReg, offset, rExclude
+    .ifnc \rReg, \rExclude
+        ld \rReg, \offset($sp)      # restore rReg
+        .cfi_restore \nReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER rDest, rObj, offset
+#ifdef USE_READ_BARRIER
+    # saved registers used in art_quick_aput_obj: a0-a2, t0-t1, t9, ra. 16B-aligned.
+    daddiu  $sp, $sp, -64
+    .cfi_adjust_cfa_offset 64
+    sd     $ra, 56($sp)
+    .cfi_rel_offset 31, 56
+    sd     $t9, 48($sp)
+    .cfi_rel_offset 25, 48
+    sd     $t1, 40($sp)
+    .cfi_rel_offset 13, 40
+    sd     $t0, 32($sp)
+    .cfi_rel_offset 12, 32
+    sd     $a2, 16($sp)             # padding slot at offset 24 (padding can be any slot in the 64B)
+    .cfi_rel_offset 6, 16
+    sd     $a1, 8($sp)
+    .cfi_rel_offset 5, 8
+    sd     $a0, 0($sp)
+    .cfi_rel_offset 4, 0
+
+    # move $a0, $a0                 # pass ref in a0 (no-op for now since parameter ref is unused)
+    .ifnc \rObj, $a1
+        move $a1, \rObj             # pass rObj
+    .endif
+    daddiu $a2, $zero, \offset      # pass offset
+    jal artReadBarrierSlow          # artReadBarrierSlow(ref, rObj, offset)
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artReadBarrierSlow.
+    # No need to unpoison return value in v0, artReadBarrierSlow() would do the unpoisoning.
+    move \rDest, $v0                # save return value in rDest
+                                    # (rDest cannot be v0 in art_quick_aput_obj)
+
+    ld     $a0, 0($sp)              # restore registers except rDest
+                                    # (rDest can only be t0 or t1 in art_quick_aput_obj)
+    .cfi_restore 4
+    ld     $a1, 8($sp)
+    .cfi_restore 5
+    ld     $a2, 16($sp)
+    .cfi_restore 6
+    POP_REG_NE $t0, 12, 32, \rDest
+    POP_REG_NE $t1, 13, 40, \rDest
+    ld     $t9, 48($sp)
+    .cfi_restore 25
+    ld     $ra, 56($sp)             # restore $ra
+    .cfi_restore 31
+    daddiu  $sp, $sp, 64
+    .cfi_adjust_cfa_offset -64
+    SETUP_GP                        # set up gp because we are not returning
+#else
+    lwu     \rDest, \offset(\rObj)
+    UNPOISON_HEAP_REF \rDest
+#endif  // USE_READ_BARRIER
+.endm
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -913,12 +981,9 @@
 ENTRY art_quick_aput_obj
     beq  $a2, $zero, .Ldo_aput_null
     nop
-    lwu $t0, MIRROR_OBJECT_CLASS_OFFSET($a0)
-    UNPOISON_HEAP_REF $t0
-    lwu $t1, MIRROR_OBJECT_CLASS_OFFSET($a2)
-    UNPOISON_HEAP_REF $t1
-    lwu $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET($t0)
-    UNPOISON_HEAP_REF $t0
+    READ_BARRIER $t0, $a0, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t1, $a2, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t0, $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
     bne $t1, $t0, .Lcheck_assignability  # value's type == array's component type - trivial assignability
     nop
 .Ldo_aput:
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0831c26..cf7db34 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1124,8 +1124,6 @@
 
 
 TEST_F(StubTest, APutObj) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
@@ -1258,8 +1256,6 @@
 }
 
 TEST_F(StubTest, AllocObject) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   // This will lead to OOM  error messages in the log.
@@ -1385,8 +1381,6 @@
 }
 
 TEST_F(StubTest, AllocObjectArray) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
@@ -1474,8 +1468,6 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
@@ -2152,8 +2144,6 @@
 }
 
 TEST_F(StubTest, Fields8) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2166,8 +2156,6 @@
 }
 
 TEST_F(StubTest, Fields16) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2180,8 +2168,6 @@
 }
 
 TEST_F(StubTest, Fields32) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2193,8 +2179,6 @@
 }
 
 TEST_F(StubTest, FieldsObj) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2206,8 +2190,6 @@
 }
 
 TEST_F(StubTest, Fields64) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2221,8 +2203,6 @@
 TEST_F(StubTest, IMT) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   ScopedObjectAccess soa(self);
@@ -2342,8 +2322,6 @@
 
 TEST_F(StubTest, StringIndexOf) {
 #if defined(__arm__) || defined(__aarch64__)
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
@@ -2416,4 +2394,40 @@
 #endif
 }
 
+TEST_F(StubTest, ReadBarrier) {
+#if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
+      defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
+  Thread* self = Thread::Current();
+
+  const uintptr_t readBarrierSlow = StubTest::GetEntrypoint(self, kQuickReadBarrierSlow);
+
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+
+  // Build an object instance
+  Handle<mirror::Object> obj(hs.NewHandle(c->AllocObject(soa.Self())));
+
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  size_t result = Invoke3(0U, reinterpret_cast<size_t>(obj.Get()),
+                          mirror::Object::ClassOffset().SizeValue(), readBarrierSlow, self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+  mirror::Class* klass = reinterpret_cast<mirror::Class*>(result);
+  EXPECT_EQ(klass, obj->GetClass());
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping read_barrier_slow";
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping read_barrier_slow" << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 737f4d1..e2632c1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -28,6 +28,9 @@
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      QuickEntryPoints* qpoints) {
   // Interpreter
@@ -141,6 +144,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index ebfb3fa..1da5a2f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -33,7 +33,6 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
@@ -60,7 +59,6 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
@@ -106,7 +104,6 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
@@ -1126,6 +1123,53 @@
     UNREACHABLE
 END_FUNCTION art_quick_check_cast
 
+// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
+MACRO2(POP_REG_NE, reg, exclude_reg)
+    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
+      addl MACRO_LITERAL(4), %esp
+      CFI_ADJUST_CFA_OFFSET(-4)
+    .else
+      POP RAW_VAR(reg)
+    .endif
+END_MACRO
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * obj_reg and dest_reg are registers, offset is a defined literal such as
+     * MIRROR_OBJECT_CLASS_OFFSET.
+     * pop_eax is a boolean flag, indicating if eax is popped after the call.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+MACRO4(READ_BARRIER, obj_reg, offset, dest_reg, pop_eax)
+#ifdef USE_READ_BARRIER
+    PUSH eax                        // save registers used in art_quick_aput_obj
+    PUSH ebx
+    PUSH edx
+    PUSH ecx
+    // Outgoing argument set up
+    pushl MACRO_LITERAL((RAW_VAR(offset)))  // pass offset, double parentheses are necessary
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH RAW_VAR(obj_reg)           // pass obj_reg
+    PUSH eax                        // pass ref, just pass eax for now since parameter ref is unused
+    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj_reg, offset)
+    // No need to unpoison return value in eax, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc RAW_VAR(dest_reg), eax
+      movl %eax, REG_VAR(dest_reg)  // save loaded ref in dest_reg
+    .endif
+    addl MACRO_LITERAL(12), %esp    // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-12)
+    POP_REG_NE ecx, RAW_VAR(dest_reg) // Restore args except dest_reg
+    POP_REG_NE edx, RAW_VAR(dest_reg)
+    POP_REG_NE ebx, RAW_VAR(dest_reg)
+    .ifc RAW_VAR(pop_eax), true
+      POP_REG_NE eax, RAW_VAR(dest_reg)
+    .endif
+#else
+    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg)
+    UNPOISON_HEAP_REF RAW_VAR(dest_reg)
+#endif  // USE_READ_BARRIER
+END_MACRO
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -1149,17 +1193,20 @@
 DEFINE_FUNCTION art_quick_aput_obj
     test %edx, %edx              // store of null
     jz .Ldo_aput_null
-    movl MIRROR_OBJECT_CLASS_OFFSET(%eax), %ebx
-    UNPOISON_HEAP_REF ebx
-    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
-    UNPOISON_HEAP_REF ebx
+    READ_BARRIER eax, MIRROR_OBJECT_CLASS_OFFSET, ebx, true
+    READ_BARRIER ebx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ebx, true
     // value's type == array's component type - trivial assignability
-#ifdef USE_HEAP_POISONING
-    PUSH eax  // save eax
+#if defined(USE_READ_BARRIER)
+    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, false
+    cmpl %eax, %ebx
+    POP eax                      // restore eax from the push in the beginning of READ_BARRIER macro
+#elif defined(USE_HEAP_POISONING)
+    PUSH eax                     // save eax
+    // Cannot call READ_BARRIER macro here, because the above push messes up stack alignment.
     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
     UNPOISON_HEAP_REF eax
     cmpl %eax, %ebx
-    POP  eax  // restore eax
+    POP eax                      // restore eax
 #else
     cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ebx
 #endif
@@ -1181,6 +1228,8 @@
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
 #ifdef USE_HEAP_POISONING
+    // This load does not need read barrier, since edx is unchanged and there's no GC safe point
+    // from last read of MIRROR_OBJECT_CLASS_OFFSET(%edx).
     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax  // pass arg2 - type of the value to be stored
     UNPOISON_HEAP_REF eax
     PUSH eax
@@ -1696,5 +1745,15 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_slow
+    PUSH edx                        // pass arg3 - offset
+    PUSH ecx                        // pass arg2 - obj
+    PUSH eax                        // pass arg1 - ref
+    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
+    addl LITERAL(12), %esp          // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-12)
+    ret
+END_FUNCTION art_quick_read_barrier_slow
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 706ae58..cf0039c 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -24,6 +24,7 @@
 #define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
 #define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
 #define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
+#define MACRO4(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4) .macro macro_name macro_arg1, macro_arg2, macro_arg3, macro_arg4
 #define END_MACRO .endm
 
 #if defined(__clang__)
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index d0ab9d5..ef1bb5f 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -29,6 +29,9 @@
 extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
                                                    const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
@@ -145,6 +148,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
 #endif  // __APPLE__
 };
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0eeb03a..f4c9488 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -66,7 +66,6 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
@@ -109,7 +108,6 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
@@ -168,7 +166,6 @@
     subq MACRO_LITERAL(80 + 4 * 8), %rsp
     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
@@ -920,8 +917,12 @@
     // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
+    // TODO: Add read barrier when this function is used.
+    // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
     UNPOISON_HEAP_REF edx
+    // TODO: Add read barrier when this function is used.
+    // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
     movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
     UNPOISON_HEAP_REF edx
@@ -1153,6 +1154,60 @@
 END_FUNCTION art_quick_check_cast
 
 
+// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
+MACRO2(POP_REG_NE, reg, exclude_reg)
+    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
+      addq MACRO_LITERAL(8), %rsp
+      CFI_ADJUST_CFA_OFFSET(-8)
+    .else
+      POP RAW_VAR(reg)
+    .endif
+END_MACRO
+
+    /*
+     * Macro to insert read barrier, used in art_quick_aput_obj and art_quick_alloc_object_tlab.
+     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
+     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
+     * 64b PUSH/POP and 32b argument.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     *
+     * As with art_quick_aput_obj* functions, the 64b versions are in comments.
+     */
+MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
+#ifdef USE_READ_BARRIER
+    PUSH rax                            // save registers that might be used
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    PUSH rcx
+    SETUP_FP_CALLEE_SAVE_FRAME
+    // Outgoing argument set up
+    // movl %edi, %edi                  // pass ref, no-op for now since parameter ref is unused
+    // // movq %rdi, %rdi
+    movl REG_VAR(obj_reg), %esi         // pass obj_reg
+    // movq REG_VAR(obj_reg), %rsi
+    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
+    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
+    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
+    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc RAW_VAR(dest_reg32), eax
+    // .ifnc RAW_VAR(dest_reg64), rax
+      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
+      // movq %rax, REG_VAR(dest_reg64)
+    .endif
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
+    POP_REG_NE rdx, RAW_VAR(dest_reg64)
+    POP_REG_NE rsi, RAW_VAR(dest_reg64)
+    POP_REG_NE rdi, RAW_VAR(dest_reg64)
+    POP_REG_NE rax, RAW_VAR(dest_reg64)
+#else
+    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
+    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
+    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
+#endif  // USE_READ_BARRIER
+END_MACRO
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -1197,15 +1252,13 @@
     testl %edx, %edx                // store of null
 //  test %rdx, %rdx
     jz .Ldo_aput_null
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %ecx
-//  movq MIRROR_OBJECT_CLASS_OFFSET(%rdi), %rcx
-    UNPOISON_HEAP_REF ecx
-    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
-//  movq MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
-    UNPOISON_HEAP_REF ecx
-#ifdef USE_HEAP_POISONING
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax  // rax is free.
-    UNPOISON_HEAP_REF eax
+    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
+    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
+    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
+    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
+#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
+    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
+    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
     cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
 #else
     cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
@@ -1232,9 +1285,14 @@
     PUSH rdx
     SETUP_FP_CALLEE_SAVE_FRAME
 
-                                  // "Uncompress" = do nothing, as already zero-extended on load.
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
-    UNPOISON_HEAP_REF esi
+#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
+    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
+    movl %eax, %esi               // Pass arg2 = value's class.
+    // movq %rax, %rsi
+#else
+                                     // "Uncompress" = do nothing, as already zero-extended on load.
+    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
+#endif
     movq %rcx, %rdi               // Pass arg1 = array's component type.
 
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
@@ -1735,3 +1793,14 @@
     call PLT_SYMBOL(longjmp)
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
+
+DEFINE_FUNCTION art_quick_read_barrier_slow
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp           // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_slow
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index f4f8eaf..350a0d4 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -109,7 +109,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 151 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 40b3669..38bc818 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -16,6 +16,7 @@
 
 #include "check_jni.h"
 
+#include <iomanip>
 #include <sys/mman.h>
 #include <zlib.h>
 
@@ -1083,10 +1084,29 @@
     }
 
     const char* errorKind = nullptr;
-    uint8_t utf8 = CheckUtfBytes(bytes, &errorKind);
+    const uint8_t* utf8 = CheckUtfBytes(bytes, &errorKind);
     if (errorKind != nullptr) {
+      // This is an expensive loop that will resize often, but this isn't supposed to hit in
+      // practice anyways.
+      std::ostringstream oss;
+      oss << std::hex;
+      const uint8_t* tmp = reinterpret_cast<const uint8_t*>(bytes);
+      while (*tmp != 0) {
+        if (tmp == utf8) {
+          oss << "<";
+        }
+        oss << "0x" << std::setfill('0') << std::setw(2) << static_cast<uint32_t>(*tmp);
+        if (tmp == utf8) {
+          oss << '>';
+        }
+        tmp++;
+        if (*tmp != 0) {
+          oss << ' ';
+        }
+      }
+
       AbortF("input is not valid Modified UTF-8: illegal %s byte %#x\n"
-          "    string: '%s'", errorKind, utf8, bytes);
+          "    string: '%s'\n    input: '%s'", errorKind, *utf8, bytes, oss.str().c_str());
       return false;
     }
     return true;
@@ -1094,11 +1114,11 @@
 
   // Checks whether |bytes| is valid modified UTF-8. We also accept 4 byte UTF
   // sequences in place of encoded surrogate pairs.
-  static uint8_t CheckUtfBytes(const char* bytes, const char** errorKind) {
+  static const uint8_t* CheckUtfBytes(const char* bytes, const char** errorKind) {
     while (*bytes != '\0') {
-      uint8_t utf8 = *(bytes++);
+      const uint8_t* utf8 = reinterpret_cast<const uint8_t*>(bytes++);
       // Switch on the high four bits.
-      switch (utf8 >> 4) {
+      switch (*utf8 >> 4) {
       case 0x00:
       case 0x01:
       case 0x02:
@@ -1118,11 +1138,11 @@
         return utf8;
       case 0x0f:
         // Bit pattern 1111, which might be the start of a 4 byte sequence.
-        if ((utf8 & 0x08) == 0) {
+        if ((*utf8 & 0x08) == 0) {
           // Bit pattern 1111 0xxx, which is the start of a 4 byte sequence.
           // We consume one continuation byte here, and fall through to consume two more.
-          utf8 = *(bytes++);
-          if ((utf8 & 0xc0) != 0x80) {
+          utf8 = reinterpret_cast<const uint8_t*>(bytes++);
+          if ((*utf8 & 0xc0) != 0x80) {
             *errorKind = "continuation";
             return utf8;
           }
@@ -1135,8 +1155,8 @@
         FALLTHROUGH_INTENDED;
       case 0x0e:
         // Bit pattern 1110, so there are two additional bytes.
-        utf8 = *(bytes++);
-        if ((utf8 & 0xc0) != 0x80) {
+        utf8 = reinterpret_cast<const uint8_t*>(bytes++);
+        if ((*utf8 & 0xc0) != 0x80) {
           *errorKind = "continuation";
           return utf8;
         }
@@ -1146,8 +1166,8 @@
       case 0x0c:
       case 0x0d:
         // Bit pattern 110x, so there is one additional byte.
-        utf8 = *(bytes++);
-        if ((utf8 & 0xc0) != 0x80) {
+        utf8 = reinterpret_cast<const uint8_t*>(bytes++);
+        if ((*utf8 & 0xc0) != 0x80) {
           *errorKind = "continuation";
           return utf8;
         }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 6a76bf7..ce54c14 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1236,11 +1236,8 @@
 
 bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  auto it = class_table_.Find(GcRoot<mirror::Class>(klass));
-  if (it == class_table_.end()) {
-    return false;
-  }
-  return it->Read() == klass;
+  ClassTable* const class_table = ClassTableForClassLoader(klass->GetClassLoader());
+  return class_table != nullptr && class_table->Contains(klass);
 }
 
 void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
@@ -1263,26 +1260,30 @@
     // Moving concurrent:
     // Need to make sure to not copy ArtMethods without doing read barriers since the roots are
     // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy.
-    for (GcRoot<mirror::Class>& root : class_table_) {
-      buffered_visitor.VisitRoot(root);
+    std::vector<std::pair<GcRoot<mirror::ClassLoader>, ClassTable*>> reinsert;
+    for (auto it = classes_.begin(); it != classes_.end(); ) {
+      it->second->VisitRoots(visitor, flags);
+      const GcRoot<mirror::ClassLoader>& root = it->first;
+      mirror::ClassLoader* old_ref = root.Read<kWithoutReadBarrier>();
+      root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+      mirror::ClassLoader* new_ref = root.Read<kWithoutReadBarrier>();
+      if (new_ref != old_ref) {
+        reinsert.push_back(*it);
+        it = classes_.erase(it);
+      } else {
+        ++it;
+      }
     }
-    // PreZygote classes can't move so we won't need to update fields' declaring classes.
-    for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
-      buffered_visitor.VisitRoot(root);
+    for (auto& pair : reinsert) {
+      classes_.Put(pair.first, pair.second);
     }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
       root.VisitRoot(visitor, RootInfo(kRootStickyClass));
       mirror::Class* new_ref = root.Read<kWithoutReadBarrier>();
-      if (UNLIKELY(new_ref != old_ref)) {
-        // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
-        // corresponding object. This is slow, but luckily for us, this may only happen with a
-        // concurrent moving GC.
-        auto it = class_table_.Find(GcRoot<mirror::Class>(old_ref));
-        DCHECK(it != class_table_.end());
-        *it = GcRoot<mirror::Class>(new_ref);
-      }
+      // Concurrent moving GC marked new roots through the to-space invariant.
+      CHECK_EQ(new_ref, old_ref);
     }
   }
   buffered_visitor.Flush();  // Flush before clearing new_class_roots_.
@@ -1331,21 +1332,27 @@
   }
 }
 
+void ClassLinker::VisitClassesInternal(ClassVisitor* visitor, void* arg) {
+  for (auto& pair : classes_) {
+    ClassTable* const class_table = pair.second;
+    if (!class_table->Visit(visitor, arg)) {
+      return;
+    }
+  }
+}
+
 void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) {
   if (dex_cache_image_class_lookup_required_) {
     MoveImageClassesToClassTable();
   }
-  // TODO: why isn't this a ReaderMutexLock?
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  for (GcRoot<mirror::Class>& root : class_table_) {
-    if (!visitor(root.Read(), arg)) {
-      return;
-    }
-  }
-  for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
-    if (!visitor(root.Read(), arg)) {
-      return;
-    }
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  // Not safe to have thread suspension when we are holding a lock.
+  if (self != nullptr) {
+    ScopedAssertNoThreadSuspension nts(self, __FUNCTION__);
+    VisitClassesInternal(visitor, arg);
+  } else {
+    VisitClassesInternal(visitor, arg);
   }
 }
 
@@ -1399,7 +1406,7 @@
       size_t class_table_size;
       {
         ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-        class_table_size = class_table_.Size() + pre_zygote_class_table_.Size();
+        class_table_size = NumZygoteClasses() + NumNonZygoteClasses();
       }
       mirror::Class* class_type = mirror::Class::GetJavaLangClass();
       mirror::Class* array_of_class = FindArrayClass(self, &class_type);
@@ -1443,6 +1450,7 @@
   mirror::LongArray::ResetArrayClass();
   mirror::ShortArray::ResetArrayClass();
   STLDeleteElements(&oat_files_);
+  STLDeleteValues(&classes_);
 }
 
 mirror::PointerArray* ClassLinker::AllocPointerArray(Thread* self, size_t length) {
@@ -2458,8 +2466,8 @@
 
 bool ClassLinker::IsDexFileRegisteredLocked(const DexFile& dex_file) {
   dex_lock_.AssertSharedHeld(Thread::Current());
-  for (size_t i = 0; i != dex_caches_.size(); ++i) {
-    mirror::DexCache* dex_cache = GetDexCache(i);
+  for (GcRoot<mirror::DexCache>& root : dex_caches_) {
+    mirror::DexCache* dex_cache = root.Read();
     if (dex_cache->GetDexFile() == &dex_file) {
       return true;
     }
@@ -2757,8 +2765,7 @@
   return nullptr;
 }
 
-mirror::Class* ClassLinker::InsertClass(const char* descriptor, mirror::Class* klass,
-                                        size_t hash) {
+mirror::Class* ClassLinker::InsertClass(const char* descriptor, mirror::Class* klass, size_t hash) {
   if (VLOG_IS_ON(class_linker)) {
     mirror::DexCache* dex_cache = klass->GetDexCache();
     std::string source;
@@ -2769,11 +2776,13 @@
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  mirror::Class* existing = LookupClassFromTableLocked(descriptor, klass->GetClassLoader(), hash);
+  mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+  mirror::Class* existing = class_table->Lookup(descriptor, hash);
   if (existing != nullptr) {
     return existing;
   }
-  if (kIsDebugBuild && !klass->IsTemp() && klass->GetClassLoader() == nullptr &&
+  if (kIsDebugBuild && !klass->IsTemp() && class_loader == nullptr &&
       dex_cache_image_class_lookup_required_) {
     // Check a class loaded with the system class loader matches one in the image if the class
     // is in the image.
@@ -2783,7 +2792,7 @@
     }
   }
   VerifyObject(klass);
-  class_table_.InsertWithHash(GcRoot<mirror::Class>(klass), hash);
+  class_table->InsertWithHash(klass, hash);
   if (log_new_class_table_roots_) {
     new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
   }
@@ -2802,95 +2811,41 @@
   }
 }
 
-mirror::Class* ClassLinker::UpdateClass(const char* descriptor, mirror::Class* klass,
-                                        size_t hash) {
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  auto existing_it = class_table_.FindWithHash(std::make_pair(descriptor, klass->GetClassLoader()),
-                                               hash);
-  CHECK(existing_it != class_table_.end());
-  mirror::Class* existing = existing_it->Read();
-  CHECK_NE(existing, klass) << descriptor;
-  CHECK(!existing->IsResolved()) << descriptor;
-  CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusResolving) << descriptor;
-
-  CHECK(!klass->IsTemp()) << descriptor;
-  if (kIsDebugBuild && klass->GetClassLoader() == nullptr &&
-      dex_cache_image_class_lookup_required_) {
-    // Check a class loaded with the system class loader matches one in the image if the class
-    // is in the image.
-    existing = LookupClassFromImage(descriptor);
-    if (existing != nullptr) {
-      CHECK_EQ(klass, existing) << descriptor;
-    }
-  }
-  VerifyObject(klass);
-
-  // Update the element in the hash set.
-  *existing_it = GcRoot<mirror::Class>(klass);
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
-  }
-
-  return existing;
-}
-
 bool ClassLinker::RemoveClass(const char* descriptor, mirror::ClassLoader* class_loader) {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  auto pair = std::make_pair(descriptor, class_loader);
-  auto it = class_table_.Find(pair);
-  if (it != class_table_.end()) {
-    class_table_.Erase(it);
-    return true;
-  }
-  it = pre_zygote_class_table_.Find(pair);
-  if (it != pre_zygote_class_table_.end()) {
-    pre_zygote_class_table_.Erase(it);
-    return true;
-  }
-  return false;
+  ClassTable* const class_table = ClassTableForClassLoader(class_loader);
+  return class_table != nullptr && class_table->Remove(descriptor);
 }
 
 mirror::Class* ClassLinker::LookupClass(Thread* self, const char* descriptor, size_t hash,
                                         mirror::ClassLoader* class_loader) {
   {
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    mirror::Class* result = LookupClassFromTableLocked(descriptor, class_loader, hash);
-    if (result != nullptr) {
-      return result;
+    ClassTable* const class_table = ClassTableForClassLoader(class_loader);
+    if (class_table != nullptr) {
+      mirror::Class* result = class_table->Lookup(descriptor, hash);
+      if (result != nullptr) {
+        return result;
+      }
     }
   }
   if (class_loader != nullptr || !dex_cache_image_class_lookup_required_) {
     return nullptr;
+  }
+  // Lookup failed but need to search dex_caches_.
+  mirror::Class* result = LookupClassFromImage(descriptor);
+  if (result != nullptr) {
+    result = InsertClass(descriptor, result, hash);
   } else {
-    // Lookup failed but need to search dex_caches_.
-    mirror::Class* result = LookupClassFromImage(descriptor);
-    if (result != nullptr) {
-      InsertClass(descriptor, result, hash);
-    } else {
-      // Searching the image dex files/caches failed, we don't want to get into this situation
-      // often as map searches are faster, so after kMaxFailedDexCacheLookups move all image
-      // classes into the class table.
-      constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
-      if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
-        MoveImageClassesToClassTable();
-      }
-    }
-    return result;
-  }
-}
-
-mirror::Class* ClassLinker::LookupClassFromTableLocked(const char* descriptor,
-                                                       mirror::ClassLoader* class_loader,
-                                                       size_t hash) {
-  auto descriptor_pair = std::make_pair(descriptor, class_loader);
-  auto it = pre_zygote_class_table_.FindWithHash(descriptor_pair, hash);
-  if (it == pre_zygote_class_table_.end()) {
-    it = class_table_.FindWithHash(descriptor_pair, hash);
-    if (it == class_table_.end()) {
-      return nullptr;
+    // Searching the image dex files/caches failed, we don't want to get into this situation
+    // often as map searches are faster, so after kMaxFailedDexCacheLookups move all image
+    // classes into the class table.
+    constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
+    if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
+      MoveImageClassesToClassTable();
     }
   }
-  return it->Read();
+  return result;
 }
 
 static mirror::ObjectArray<mirror::DexCache>* GetImageDexCaches()
@@ -2910,6 +2865,7 @@
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
   std::string temp;
+  ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     mirror::ObjectArray<mirror::Class>* types = dex_cache->GetResolvedTypes();
@@ -2919,12 +2875,12 @@
         DCHECK(klass->GetClassLoader() == nullptr);
         const char* descriptor = klass->GetDescriptor(&temp);
         size_t hash = ComputeModifiedUtf8Hash(descriptor);
-        mirror::Class* existing = LookupClassFromTableLocked(descriptor, nullptr, hash);
+        mirror::Class* existing = class_table->Lookup(descriptor, hash);
         if (existing != nullptr) {
           CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
               << PrettyClassAndClassLoader(klass);
         } else {
-          class_table_.Insert(GcRoot<mirror::Class>(klass));
+          class_table->Insert(klass);
           if (log_new_class_table_roots_) {
             new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
           }
@@ -2937,9 +2893,9 @@
 
 void ClassLinker::MoveClassTableToPreZygote() {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  DCHECK(pre_zygote_class_table_.Empty());
-  pre_zygote_class_table_ = std::move(class_table_);
-  class_table_.Clear();
+  for (auto& class_table : classes_) {
+    class_table.second->FreezeSnapshot();
+  }
 }
 
 mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) {
@@ -2971,31 +2927,13 @@
     MoveImageClassesToClassTable();
   }
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  while (true) {
-    auto it = class_table_.Find(descriptor);
-    if (it == class_table_.end()) {
-      break;
+  for (auto& pair : classes_) {
+    // There can only be one class with the same descriptor per class loader.
+    ClassTable* const class_table  = pair.second;
+    mirror::Class* klass = class_table->Lookup(descriptor, ComputeModifiedUtf8Hash(descriptor));
+    if (klass != nullptr) {
+      result.push_back(klass);
     }
-    result.push_back(it->Read());
-    class_table_.Erase(it);
-  }
-  for (mirror::Class* k : result) {
-    class_table_.Insert(GcRoot<mirror::Class>(k));
-  }
-  size_t pre_zygote_start = result.size();
-  // Now handle the pre zygote table.
-  // Note: This dirties the pre-zygote table but shouldn't be an issue since LookupClasses is only
-  // called from the debugger.
-  while (true) {
-    auto it = pre_zygote_class_table_.Find(descriptor);
-    if (it == pre_zygote_class_table_.end()) {
-      break;
-    }
-    result.push_back(it->Read());
-    pre_zygote_class_table_.Erase(it);
-  }
-  for (size_t i = pre_zygote_start; i < result.size(); ++i) {
-    pre_zygote_class_table_.Insert(GcRoot<mirror::Class>(result[i]));
   }
 }
 
@@ -3303,7 +3241,7 @@
   klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
-  size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
+  const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
 
   // Insert the class before loading the fields as the field roots
   // (ArtField::declaring_class_) are only visited from the class
@@ -4046,6 +3984,25 @@
   }
 }
 
+ClassTable* ClassLinker::InsertClassTableForClassLoader(mirror::ClassLoader* class_loader) {
+  auto it = classes_.find(GcRoot<mirror::ClassLoader>(class_loader));
+  if (it != classes_.end()) {
+    return it->second;
+  }
+  // Class table for loader not found, add it to the table.
+  auto* const class_table = new ClassTable;
+  classes_.Put(GcRoot<mirror::ClassLoader>(class_loader), class_table);
+  return class_table;
+}
+
+ClassTable* ClassLinker::ClassTableForClassLoader(mirror::ClassLoader* class_loader) {
+  auto it = classes_.find(GcRoot<mirror::ClassLoader>(class_loader));
+  if (it != classes_.end()) {
+    return it->second;
+  }
+  return nullptr;
+}
+
 bool ClassLinker::LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
                             Handle<mirror::ObjectArray<mirror::Class>> interfaces,
                             MutableHandle<mirror::Class>* h_new_class_out) {
@@ -4096,9 +4053,26 @@
     CHECK_EQ(h_new_class->GetClassSize(), class_size);
     ObjectLock<mirror::Class> lock(self, h_new_class);
     FixupTemporaryDeclaringClass(klass.Get(), h_new_class.Get());
-    mirror::Class* existing = UpdateClass(descriptor, h_new_class.Get(),
-                                          ComputeModifiedUtf8Hash(descriptor));
-    CHECK(existing == nullptr || existing == klass.Get());
+
+    {
+      WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+      mirror::ClassLoader* const class_loader = h_new_class.Get()->GetClassLoader();
+      ClassTable* const table = InsertClassTableForClassLoader(class_loader);
+      mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(),
+                                                   ComputeModifiedUtf8Hash(descriptor));
+      CHECK_EQ(existing, klass.Get());
+      if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) {
+        // Check a class loaded with the system class loader matches one in the image if the class
+        // is in the image.
+        mirror::Class* const image_class = LookupClassFromImage(descriptor);
+        if (image_class != nullptr) {
+          CHECK_EQ(klass.Get(), existing) << descriptor;
+        }
+      }
+      if (log_new_class_table_roots_) {
+        new_class_roots_.push_back(GcRoot<mirror::Class>(h_new_class.Get()));
+      }
+    }
 
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
@@ -5589,23 +5563,13 @@
   return dex_file.GetMethodShorty(method_id, length);
 }
 
-void ClassLinker::DumpAllClasses(int flags) {
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
-  }
-  // TODO: at the time this was written, it wasn't safe to call PrettyField with the ClassLinker
-  // lock held, because it might need to resolve a field's type, which would try to take the lock.
-  std::vector<mirror::Class*> all_classes;
-  {
-    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    for (GcRoot<mirror::Class>& it : class_table_) {
-      all_classes.push_back(it.Read());
-    }
-  }
+bool DumpClassVisitor(mirror::Class* klass, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+  klass->DumpClass(LOG(ERROR), reinterpret_cast<ssize_t>(arg));
+  return true;
+}
 
-  for (size_t i = 0; i < all_classes.size(); ++i) {
-    all_classes[i]->DumpClass(std::cerr, flags);
-  }
+void ClassLinker::DumpAllClasses(int flags) {
+  VisitClasses(&DumpClassVisitor, reinterpret_cast<void*>(flags));
 }
 
 static OatFile::OatMethod CreateOatMethod(const void* code) {
@@ -5658,8 +5622,24 @@
     MoveImageClassesToClassTable();
   }
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  os << "Zygote loaded classes=" << pre_zygote_class_table_.Size() << " post zygote classes="
-     << class_table_.Size() << "\n";
+  os << "Zygote loaded classes=" << NumZygoteClasses() << " post zygote classes="
+     << NumNonZygoteClasses() << "\n";
+}
+
+size_t ClassLinker::NumZygoteClasses() const {
+  size_t sum = 0;
+  for (auto& pair : classes_) {
+    sum += pair.second->NumZygoteClasses();
+  }
+  return sum;
+}
+
+size_t ClassLinker::NumNonZygoteClasses() const {
+  size_t sum = 0;
+  for (auto& pair : classes_) {
+    sum += pair.second->NumNonZygoteClasses();
+  }
+  return sum;
 }
 
 size_t ClassLinker::NumLoadedClasses() {
@@ -5668,7 +5648,7 @@
   }
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   // Only return non zygote classes since these are the ones which apps which care about.
-  return class_table_.Size();
+  return NumNonZygoteClasses();
 }
 
 pid_t ClassLinker::GetClassesLockOwner() {
@@ -5739,43 +5719,6 @@
   return descriptor;
 }
 
-std::size_t ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
-    const {
-  std::string temp;
-  return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
-}
-
-bool ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                        const GcRoot<mirror::Class>& b) const {
-  if (a.Read()->GetClassLoader() != b.Read()->GetClassLoader()) {
-    return false;
-  }
-  std::string temp;
-  return a.Read()->DescriptorEquals(b.Read()->GetDescriptor(&temp));
-}
-
-std::size_t ClassLinker::ClassDescriptorHashEquals::operator()(
-    const std::pair<const char*, mirror::ClassLoader*>& element) const {
-  return ComputeModifiedUtf8Hash(element.first);
-}
-
-bool ClassLinker::ClassDescriptorHashEquals::operator()(
-    const GcRoot<mirror::Class>& a, const std::pair<const char*, mirror::ClassLoader*>& b) const {
-  if (a.Read()->GetClassLoader() != b.second) {
-    return false;
-  }
-  return a.Read()->DescriptorEquals(b.first);
-}
-
-bool ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                        const char* descriptor) const {
-  return a.Read()->DescriptorEquals(descriptor);
-}
-
-std::size_t ClassLinker::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
-  return ComputeModifiedUtf8Hash(descriptor);
-}
-
 bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) {
   if (Runtime::Current()->UseJit()) {
     // JIT can have direct code pointers from any method to any other method.
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 05a809e..1337563 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -25,6 +25,7 @@
 #include "base/hash_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "class_table.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "jni.h"
@@ -56,8 +57,6 @@
 class ScopedObjectAccessAlreadyRunnable;
 template<size_t kNumReferences> class PACKED(4) StackHandleScope;
 
-typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
-
 enum VisitRootFlags : uint8_t;
 
 class ClassLinker {
@@ -476,9 +475,28 @@
   void DropFindArrayClassCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
+  class CompareClassLoaderGcRoot {
+   public:
+    bool operator()(const GcRoot<mirror::ClassLoader>& a, const GcRoot<mirror::ClassLoader>& b)
+        const SHARED_REQUIRES(Locks::mutator_lock_) {
+      return a.Read() < b.Read();
+    }
+  };
+
+  typedef SafeMap<GcRoot<mirror::ClassLoader>, ClassTable*, CompareClassLoaderGcRoot>
+      ClassLoaderClassTable;
+
+  void VisitClassesInternal(ClassVisitor* visitor, void* arg)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of zygote and image classes.
+  size_t NumZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
+  // Returns the number of non zygote nor image classes.
+  size_t NumNonZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
-      REQUIRES(!dex_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FinishInit(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
@@ -543,8 +561,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
-      REQUIRES(dex_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsDexFileRegisteredLocked(const DexFile& dex_file)
       SHARED_REQUIRES(dex_lock_, Locks::mutator_lock_);
 
@@ -568,7 +585,7 @@
   bool LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
                  Handle<mirror::ObjectArray<mirror::Class>> interfaces,
                  MutableHandle<mirror::Class>* h_new_class_out)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::classlinker_classes_lock_);
 
   bool LinkSuperClass(Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -576,7 +593,8 @@
   bool LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
 
-  bool LinkMethods(Thread* self, Handle<mirror::Class> klass,
+  bool LinkMethods(Thread* self,
+                   Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -632,18 +650,16 @@
   void EnsurePreverifiedMethods(Handle<mirror::Class> c)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::Class* LookupClassFromTableLocked(const char* descriptor,
-                                            mirror::ClassLoader* class_loader,
-                                            size_t hash)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
-
-  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash)
-      REQUIRES(!Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   mirror::Class* LookupClassFromImage(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns null if not found.
+  ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_);
+  // Insert a new class table if not found.
+  ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::classlinker_classes_lock_);
+
   // EnsureResolved is called to make sure that a class in the class_table_ has been resolved
   // before returning it to the caller. Its the responsibility of the thread that placed the class
   // in the table to make it resolved. The thread doing resolution must notify on the class' lock
@@ -690,43 +706,11 @@
   std::vector<GcRoot<mirror::DexCache>> dex_caches_ GUARDED_BY(dex_lock_);
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
-  class ClassDescriptorHashEquals {
-   public:
-    // Same class loader and descriptor.
-    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
-    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    // Same class loader and descriptor.
-    std::size_t operator()(const std::pair<const char*, mirror::ClassLoader*>& element) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    bool operator()(const GcRoot<mirror::Class>& a,
-                    const std::pair<const char*, mirror::ClassLoader*>& b) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    // Same descriptor.
-    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
-  };
-  class GcRootEmptyFn {
-   public:
-    void MakeEmpty(GcRoot<mirror::Class>& item) const {
-      item = GcRoot<mirror::Class>();
-    }
-    bool IsEmpty(const GcRoot<mirror::Class>& item) const {
-      return item.IsNull();
-    }
-  };
+  // This contains strong roots. To enable concurrent root scanning of the class table.
+  ClassLoaderClassTable classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
-  // hash set which hashes class descriptor, and compares descriptors nad class loaders. Results
-  // should be compared for a matching Class descriptor and class loader.
-  typedef HashSet<GcRoot<mirror::Class>, GcRootEmptyFn, ClassDescriptorHashEquals,
-      ClassDescriptorHashEquals, TrackingAllocator<GcRoot<mirror::Class>, kAllocatorTagClassTable>>
-      Table;
-  // This contains strong roots. To enable concurrent root scanning of
-  // the class table, be careful to use a read barrier when accessing this.
-  Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  Table pre_zygote_class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  std::vector<GcRoot<mirror::Class>> new_class_roots_;
+  // New class roots, only used by CMS since the GC needs to mark these in the pause.
+  std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   // Do we need to search dex caches to find image classes?
   bool dex_cache_image_class_lookup_required_;
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
new file mode 100644
index 0000000..f775235
--- /dev/null
+++ b/runtime/class_table.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_table.h"
+
+#include "mirror/class-inl.h"
+
+namespace art {
+
+ClassTable::ClassTable() {
+  classes_.push_back(ClassSet());
+}
+
+void ClassTable::FreezeSnapshot() {
+  classes_.push_back(ClassSet());
+}
+
+bool ClassTable::Contains(mirror::Class* klass) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.Find(GcRoot<mirror::Class>(klass));
+    if (it != class_set.end()) {
+      return it->Read() == klass;
+    }
+  }
+  return false;
+}
+
+mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
+  // Should only be updating latest table.
+  auto existing_it = classes_.back().FindWithHash(descriptor, hash);
+  if (kIsDebugBuild && existing_it == classes_.back().end()) {
+    for (const ClassSet& class_set : classes_) {
+      if (class_set.FindWithHash(descriptor, hash) != class_set.end()) {
+        LOG(FATAL) << "Updating class found in frozen table " << descriptor;
+      }
+    }
+    LOG(FATAL) << "Updating class not found " << descriptor;
+  }
+  mirror::Class* const existing = existing_it->Read();
+  CHECK_NE(existing, klass) << descriptor;
+  CHECK(!existing->IsResolved()) << descriptor;
+  CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusResolving) << descriptor;
+  CHECK(!klass->IsTemp()) << descriptor;
+  VerifyObject(klass);
+  // Update the element in the hash set with the new class. This is safe to do since the descriptor
+  // doesn't change.
+  *existing_it = GcRoot<mirror::Class>(klass);
+  return existing;
+}
+
+void ClassTable::VisitRoots(RootVisitor* visitor, VisitRootFlags flags ATTRIBUTE_UNUSED) {
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
+      visitor, RootInfo(kRootStickyClass));
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      buffered_visitor.VisitRoot(root);
+    }
+  }
+}
+
+bool ClassTable::Visit(ClassVisitor* visitor, void* arg) {
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      if (!visitor(root.Read(), arg)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+size_t ClassTable::NumZygoteClasses() const {
+  size_t sum = 0;
+  for (size_t i = 0; i < classes_.size() - 1; ++i) {
+    sum += classes_[i].Size();
+  }
+  return sum;
+}
+
+size_t ClassTable::NumNonZygoteClasses() const {
+  return classes_.back().Size();
+}
+
+mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.FindWithHash(descriptor, hash);
+    if (it != class_set.end()) {
+     return it->Read();
+    }
+  }
+  return nullptr;
+}
+
+void ClassTable::Insert(mirror::Class* klass) {
+  classes_.back().Insert(GcRoot<mirror::Class>(klass));
+}
+
+void ClassTable::InsertWithHash(mirror::Class* klass, size_t hash) {
+  classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
+}
+
+bool ClassTable::Remove(const char* descriptor) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.Find(descriptor);
+    if (it != class_set.end()) {
+      class_set.Erase(it);
+      return true;
+    }
+  }
+  return false;
+}
+
+std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+    const {
+  std::string temp;
+  return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
+}
+
+bool ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
+                                                       const GcRoot<mirror::Class>& b) const {
+  DCHECK_EQ(a.Read()->GetClassLoader(), b.Read()->GetClassLoader());
+  std::string temp;
+  return a.Read()->DescriptorEquals(b.Read()->GetDescriptor(&temp));
+}
+
+bool ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
+                                                       const char* descriptor) const {
+  return a.Read()->DescriptorEquals(descriptor);
+}
+
+std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
+  return ComputeModifiedUtf8Hash(descriptor);
+}
+
+}  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
new file mode 100644
index 0000000..af25131
--- /dev/null
+++ b/runtime/class_table.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CLASS_TABLE_H_
+#define ART_RUNTIME_CLASS_TABLE_H_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/allocator.h"
+#include "base/hash_set.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "gc_root.h"
+#include "object_callbacks.h"
+#include "runtime.h"
+
+namespace art {
+
+namespace mirror {
+  class ClassLoader;
+}  // namespace mirror
+
+typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
+
+// Each loader has a ClassTable
+class ClassTable {
+ public:
+  ClassTable();
+
+  // Used by image writer for checking.
+  bool Contains(mirror::Class* klass)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Freeze the current class tables by allocating a new table and never updating or modifying the
+  // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
+  void FreezeSnapshot()
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of classes in previous snapshots.
+  size_t NumZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
+  // Returns all off the classes in the lastest snapshot.
+  size_t NumNonZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
+  // Update a class in the table with the new class. Returns the existing class which was replaced.
+  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return false if the callback told us to exit.
+  bool Visit(ClassVisitor* visitor, void* arg)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  mirror::Class* Lookup(const char* descriptor, size_t hash)
+      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+
+  void Insert(mirror::Class* klass)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+  void InsertWithHash(mirror::Class* klass, size_t hash)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns true if the class was found and removed, false otherwise.
+  bool Remove(const char* descriptor)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+ private:
+  class ClassDescriptorHashEquals {
+   public:
+    // Same class loader and descriptor.
+    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
+    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
+        NO_THREAD_SAFETY_ANALYSIS;;
+    // Same descriptor.
+    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
+        NO_THREAD_SAFETY_ANALYSIS;
+    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+  };
+  class GcRootEmptyFn {
+   public:
+    void MakeEmpty(GcRoot<mirror::Class>& item) const {
+      item = GcRoot<mirror::Class>();
+    }
+    bool IsEmpty(const GcRoot<mirror::Class>& item) const {
+      return item.IsNull();
+    }
+  };
+  // hash set which hashes class descriptor, and compares descriptors nad class loaders. Results
+  // should be compared for a matching Class descriptor and class loader.
+  typedef HashSet<GcRoot<mirror::Class>, GcRootEmptyFn, ClassDescriptorHashEquals,
+      ClassDescriptorHashEquals, TrackingAllocator<GcRoot<mirror::Class>, kAllocatorTagClassTable>>
+      ClassSet;
+
+  // TODO: shard lock to have one per class loader.
+  std::vector<ClassSet> classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CLASS_TABLE_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index cef2510..3d3f7a1 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -20,6 +20,7 @@
 #include <jni.h>
 
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "offsets.h"
 
 #define QUICK_ENTRYPOINT_OFFSET(ptr_size, x) \
@@ -71,6 +72,16 @@
                            Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
+// Read barrier entrypoints.
+// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to this function directly.
+// For x86 and x86_64, compilers need a wrapper assembly function, to handle mismatch in ABI.
+// This is the read barrier slow path for instance and static fields and reference-type arrays.
+// TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
+// Ideally the slow path should only take one parameter "ref".
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Object* obj,
+                                              uint32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 60bbf4a..73d8ae7 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -145,7 +145,8 @@
   V(NewStringFromStringBuffer, void) \
   V(NewStringFromStringBuilder, void) \
 \
-  V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*)
+  V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
+  V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 25a943a..0a1d806 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -557,4 +557,16 @@
   return -1;  // failure
 }
 
+// TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
+// take one parameter "ref", which is generated by the fast path.
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
+                                              mirror::Object* obj, uint32_t offset) {
+  DCHECK(kUseReadBarrier);
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
+  mirror::HeapReference<mirror::Object>* ref_addr =
+      reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr);
+  return ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, true>(obj, MemberOffset(offset),
+                                                                      ref_addr);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c05c935..f7a3cd5 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -311,8 +311,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierJni)
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierSlow)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/oat.h b/runtime/oat.h
index ee2f3f6..29dd76c 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '7', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '8', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index 4f408dd..710c21f 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -47,9 +47,4 @@
 #error "Only one of Baker or Brooks can be enabled at a time."
 #endif
 
-// A placeholder marker to indicate places to add read barriers in the
-// assembly code. This is a development time aid and to be removed
-// after read barriers are added.
-#define THIS_LOAD_REQUIRES_READ_BARRIER
-
 #endif  // ART_RUNTIME_READ_BARRIER_C_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6949b0b..2b977af 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2304,6 +2304,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java
index 4db116a..c108a90 100644
--- a/test/441-checker-inliner/src/Main.java
+++ b/test/441-checker-inliner/src/Main.java
@@ -157,6 +157,31 @@
     return x;
   }
 
+  /// CHECK-START: int Main.returnAbs(int) intrinsics_recognition (before)
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  /// CHECK-START: int Main.returnAbs(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect intrinsic:MathAbsInt
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  private static int returnAbs(int i) {
+    return Math.abs(i);
+  }
+
+  /// CHECK-START: int Main.InlinedIntrinsicsAreStillIntrinsic() inliner (before)
+  /// CHECK-DAG:     <<ConstMinus1:i\d+>> IntConstant -1
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  /// CHECK-START: int Main.InlinedIntrinsicsAreStillIntrinsic() inliner (after)
+  /// CHECK-DAG:     <<ConstMinus1:i\d+>> IntConstant -1
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect intrinsic:MathAbsInt
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  public static int InlinedIntrinsicsAreStillIntrinsic() {
+    return returnAbs(-1);
+  }
 
   private static void returnVoid() {
     return;
@@ -238,5 +263,13 @@
     if (InlineWithControlFlow(false) != 2) {
       throw new Error();
     }
+
+    if (InlinedIntrinsicsAreStillIntrinsic() != 1) {
+      throw new Error();
+    }
+
+    if (returnAbs(-1) != 1) {
+      throw new Error();
+    }
   }
 }
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 992a8a6..d58f034 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -150,5 +150,12 @@
   result: EXEC_FAILED,
   modes: [device],
   names: ["org.apache.harmony.tests.java.lang.ClassTest#test_forNameLjava_lang_String"]
+},
+{
+  description: "TimeZoneTest.testAllDisplayNames times out, needs investigation",
+  result: EXEC_FAILED,
+  modes: [device],
+  names: ["libcore.java.util.TimeZoneTest.testAllDisplayNames"],
+  bug: 22786792
 }
 ]