Merge "ART: Implement X86 hard float (Quick/JNI/Baseline)"
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index cd9ed50..c792536 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -221,6 +221,10 @@
   art_cflags += -DART_HEAP_POISONING=1
 endif
 
+ifeq ($(ART_USE_READ_BARRIER),true)
+  art_cflags += -DART_USE_READ_BARRIER=1
+endif
+
 # Cflags for non-debug ART and ART tools.
 art_non_debug_cflags := \
   -O3
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d69447d..b234249 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -907,7 +907,6 @@
 }
 
 void ImageWriter::CopyAndFixupObjects() {
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "ImageWriter");
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // TODO: heap validation can't handle this fix up pass
   heap->DisableObjectValidation();
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index bf996a2..ba73828 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -135,6 +135,7 @@
     FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     // Check handle scope offset is within frame
     CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
+    // TODO: Insert the read barrier for this load.
     __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
                mr_conv->MethodRegister(), mirror::ArtMethod::DeclaringClassOffset());
     __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 43fd8bb..0a405c4 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -140,9 +140,7 @@
                                      size_t maximum_number_of_live_core_registers,
                                      size_t maximum_number_of_live_fp_registers,
                                      size_t number_of_out_slots) {
-  core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
-  DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
-  fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+  ComputeSpillMask();
   first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
 
   SetFrameSize(RoundUp(
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 85d18c0..45f02e5 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -129,6 +129,20 @@
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
   virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
 
+  virtual void ComputeSpillMask() {
+    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+  }
+
+  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
+    uint32_t mask = 0;
+    for (size_t i = 0, e = length; i < e; ++i) {
+      mask |= (1 << registers[i]);
+    }
+    return mask;
+  }
+
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
   virtual InstructionSet GetInstructionSet() const = 0;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f4e4f5a..824663a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -50,6 +50,13 @@
 static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 };
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
+// We unconditionally allocate R5 to ensure we can do long operations
+// with baseline.
+static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreCalleeSaves[] =
+    { R5, R6, R7, R8, R10, R11, PC };
+static constexpr SRegister kFpuCalleeSaves[] =
+    { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 };
 
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
  public:
@@ -374,20 +381,27 @@
 CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
                                    const ArmInstructionSetFeatures& isa_features,
                                    const CompilerOptions& compiler_options)
-    : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters,
-                    kNumberOfRegisterPairs, (1 << R6) | (1 << R7) | (1 << LR), 0, compiler_options),
+    : CodeGenerator(graph,
+                    kNumberOfCoreRegisters,
+                    kNumberOfSRegisters,
+                    kNumberOfRegisterPairs,
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+                                        arraysize(kCoreCalleeSaves)),
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                        arraysize(kFpuCalleeSaves)),
+                    compiler_options),
       block_labels_(graph->GetArena(), 0),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       assembler_(true),
       isa_features_(isa_features) {
-  // We unconditionally allocate R6 and R7 to ensure we can do long operations
-  // with baseline.
-  AddAllocatedRegister(Location::RegisterLocation(R6));
-  AddAllocatedRegister(Location::RegisterLocation(R7));
-  // Save the link register to mimic Quick.
-  AddAllocatedRegister(Location::RegisterLocation(LR));
+  // Save one extra register for baseline. Note that on thumb2, there is no easy
+  // instruction to restore just the PC, so this actually helps both baseline
+  // and non-baseline to save and restore at least two registers at entry and exit.
+  AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline));
+  // Save the PC register to mimic Quick.
+  AddAllocatedRegister(Location::RegisterLocation(PC));
 }
 
 Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
@@ -456,31 +470,17 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  // TODO: We currently don't use Quick's callee saved registers.
-  // We always save and restore R6 and R7 to make sure we can use three
-  // register pairs for long operations.
-  blocked_core_registers_[R4] = true;
-  blocked_core_registers_[R5] = true;
-  blocked_core_registers_[R8] = true;
-  blocked_core_registers_[R10] = true;
-  blocked_core_registers_[R11] = true;
+  if (is_baseline) {
+    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
+    }
 
-  blocked_fpu_registers_[S16] = true;
-  blocked_fpu_registers_[S17] = true;
-  blocked_fpu_registers_[S18] = true;
-  blocked_fpu_registers_[S19] = true;
-  blocked_fpu_registers_[S20] = true;
-  blocked_fpu_registers_[S21] = true;
-  blocked_fpu_registers_[S22] = true;
-  blocked_fpu_registers_[S23] = true;
-  blocked_fpu_registers_[S24] = true;
-  blocked_fpu_registers_[S25] = true;
-  blocked_fpu_registers_[S26] = true;
-  blocked_fpu_registers_[S27] = true;
-  blocked_fpu_registers_[S28] = true;
-  blocked_fpu_registers_[S29] = true;
-  blocked_fpu_registers_[S30] = true;
-  blocked_fpu_registers_[S31] = true;
+    blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
+
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
+  }
 
   UpdateBlockedPairRegisters();
 }
@@ -501,6 +501,28 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
+static uint32_t LeastSignificantBit(uint32_t mask) {
+  // ffs starts at 1.
+  return ffs(mask) - 1;
+}
+
+void CodeGeneratorARM::ComputeSpillMask() {
+  core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+  DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+  // We use vpush and vpop for saving and restoring floating point registers, which take
+  // a SRegister and the number of registers to save/restore after that SRegister. We
+  // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
+  // but in the range.
+  if (fpu_spill_mask_ != 0) {
+    uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
+    uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
+    for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
+      fpu_spill_mask_ |= (1 << i);
+    }
+  }
+}
+
 void CodeGeneratorARM::GenerateFrameEntry() {
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
@@ -511,14 +533,24 @@
     RecordPcInfo(nullptr, 0);
   }
 
-  __ PushList(core_spill_mask_);
+  // PC is in the list of callee-save to mimic Quick, but we need to push
+  // LR at entry instead.
+  __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR);
+  if (fpu_spill_mask_ != 0) {
+    SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
+    __ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
+  }
   __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
   __ StoreToOffset(kStoreWord, R0, SP, 0);
 }
 
 void CodeGeneratorARM::GenerateFrameExit() {
   __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
-  __ PopList((core_spill_mask_ & (~(1 << LR))) | 1 << PC);
+  if (fpu_spill_mask_ != 0) {
+    SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
+    __ vpops(start_register, POPCOUNT(fpu_spill_mask_));
+  }
+  __ PopList(core_spill_mask_);
 }
 
 void CodeGeneratorARM::Bind(HBasicBlock* block) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 46accfd..dd69e4d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -245,6 +245,8 @@
     return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
   }
 
+  void ComputeSpillMask() OVERRIDE;
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e60f8a5..6bc28ff 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -45,9 +45,7 @@
 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
-static constexpr Register kFakeReturnRegister = Register(16);
-static constexpr Register kCoreCalleeSaves[] =
-    { RBX, RBP, R12, R13, R14, R15, kFakeReturnRegister };
+static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
 
 static constexpr int kC2ConditionMask = 0x400;
@@ -403,30 +401,24 @@
   return kX86_64WordSize;
 }
 
-static uint32_t ComputeCalleeSaveMask(const int* registers, size_t length) {
-  uint32_t mask = 0;
-  for (size_t i = 0, e = length; i < e; ++i) {
-    mask |= (1 << registers[i]);
-  }
-  return mask;
-}
-
 static constexpr int kNumberOfCpuRegisterPairs = 0;
+// Use a fake return address register to mimic Quick.
+static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
       : CodeGenerator(graph,
                       kNumberOfCpuRegisters,
                       kNumberOfFloatRegisters,
                       kNumberOfCpuRegisterPairs,
-                      ComputeCalleeSaveMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
-                                            arraysize(kCoreCalleeSaves)),
-                      ComputeCalleeSaveMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
-                                            arraysize(kFpuCalleeSaves)),
+                      ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+                                          arraysize(kCoreCalleeSaves))
+                          | (1 << kFakeReturnRegister),
+                      ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                          arraysize(kFpuCalleeSaves)),
                       compiler_options),
         block_labels_(graph->GetArena(), 0),
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this) {
-  // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -492,7 +484,7 @@
 
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) {
+    if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ pushq(CpuRegister(reg));
     }
   }
@@ -525,7 +517,7 @@
 
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) {
+    if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ popq(CpuRegister(reg));
     }
   }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index d104077..8a4b8c0 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -64,6 +64,7 @@
   gc/space/image_space.cc \
   gc/space/large_object_space.cc \
   gc/space/malloc_space.cc \
+  gc/space/region_space.cc \
   gc/space/rosalloc_space.cc \
   gc/space/space.cc \
   gc/space/zygote_space.cc \
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 53b9f4e..037c26e 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -46,4 +46,8 @@
 GENERATE_ALLOC_ENTRYPOINTS _bump_pointer_instrumented, BumpPointerInstrumented
 GENERATE_ALLOC_ENTRYPOINTS _tlab, TLAB
 GENERATE_ALLOC_ENTRYPOINTS _tlab_instrumented, TLABInstrumented
+GENERATE_ALLOC_ENTRYPOINTS _region, Region
+GENERATE_ALLOC_ENTRYPOINTS _region_instrumented, RegionInstrumented
+GENERATE_ALLOC_ENTRYPOINTS _region_tlab, RegionTLAB
+GENERATE_ALLOC_ENTRYPOINTS _region_tlab_instrumented, RegionTLABInstrumented
 .endm
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 32ee46c..c5e1914 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -863,6 +863,46 @@
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+
 TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 48f5e85..c865541 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -903,6 +903,46 @@
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+
 TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h
index 8b0dc07..dda1d5f 100644
--- a/runtime/arch/x86_64/registers_x86_64.h
+++ b/runtime/arch/x86_64/registers_x86_64.h
@@ -43,6 +43,7 @@
   R13 = 13,
   R14 = 14,
   R15 = 15,
+  kLastCpuRegister = 15,
   kNumberOfCpuRegisters = 16,
   kNoRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 17b2ac9..9453741 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -319,19 +319,24 @@
   exclusive_owner_ = 0;
 }
 
+// Helper to ignore the lock requirement.
+static bool IsShuttingDown() NO_THREAD_SAFETY_ANALYSIS {
+  Runtime* runtime = Runtime::Current();
+  return runtime == nullptr || runtime->IsShuttingDownLocked();
+}
+
 Mutex::~Mutex() {
+  bool shutting_down = IsShuttingDown();
 #if ART_USE_FUTEXES
   if (state_.LoadRelaxed() != 0) {
-    Runtime* runtime = Runtime::Current();
-    bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
-    CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
-    if (level_ != kMonitorLock) {
-      // Only check the lock level for non monitor locks since we may still have java threads
-      // waiting on monitors.
-      CHECK_EQ(num_contenders_.LoadSequentiallyConsistent(), 0)
-          << "unexpectedly found a contender on mutex " << name_;
+    if (exclusive_owner_ != 0) {
+      LOG(shutting_down ? WARNING : FATAL) << "unexpectedly found an owner on unlocked mutex "
+                                           << name_;
+    }
+    if (num_contenders_.LoadSequentiallyConsistent() != 0) {
+      LOG(shutting_down ? WARNING : FATAL) << "unexpectedly found a contender on mutex " << name_;
     }
   }
 #else
@@ -342,8 +347,6 @@
     errno = rc;
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
-    Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
   }
 #endif
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 9c93cc6..745b209 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -60,6 +60,7 @@
   kThreadSuspendCountLock,
   kAbortLock,
   kJdwpSocketLock,
+  kRegionSpaceRegionLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
   kReferenceQueueFinalizerReferencesLock,
@@ -70,6 +71,7 @@
   kRosAllocBracketLock,
   kRosAllocBulkFreeLock,
   kAllocSpaceLock,
+  kBumpPointerSpaceBlockLock,
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
   kMarkSweepMarkStackLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b0d55c3..b66dfeb 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -2697,9 +2697,6 @@
   CHECK(descriptor != nullptr);
 
   klass->SetClass(GetClassRoot(kJavaLangClass));
-  if (kUseBakerOrBrooksReadBarrier) {
-    klass->AssertReadBarrierPointer();
-  }
   uint32_t access_flags = dex_class_def.GetJavaAccessFlags();
   CHECK_EQ(access_flags & ~kAccJavaFlagsMask, 0U);
   klass->SetAccessFlags(access_flags);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c63e2d7..1cbaf39 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -105,18 +105,17 @@
 
 jobject Dbg::TypeCache::Add(mirror::Class* t) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  int32_t hash_code = t->IdentityHashCode();
+  JNIEnv* const env = soa.Env();
+  ScopedLocalRef<jobject> local_ref(soa.Env(), soa.AddLocalReference<jobject>(t));
+  const int32_t hash_code = soa.Decode<mirror::Class*>(local_ref.get())->IdentityHashCode();
   auto range = objects_.equal_range(hash_code);
   for (auto it = range.first; it != range.second; ++it) {
-    if (soa.Decode<mirror::Class*>(it->second) == t) {
+    if (soa.Decode<mirror::Class*>(it->second) == soa.Decode<mirror::Class*>(local_ref.get())) {
       // Found a matching weak global, return it.
       return it->second;
     }
   }
-  JNIEnv* env = soa.Env();
-  const jobject local_ref = soa.AddLocalReference<jobject>(t);
-  const jobject weak_global = env->NewWeakGlobalRef(local_ref);
-  env->DeleteLocalRef(local_ref);
+  const jobject weak_global = env->NewWeakGlobalRef(local_ref.get());
   objects_.insert(std::make_pair(hash_code, weak_global));
   return weak_global;
 }
@@ -4488,6 +4487,18 @@
         context.SetChunkOverhead(0);
         space->AsBumpPointerSpace()->Walk(BumpPointerSpaceCallback, &context);
         HeapChunkContext::HeapChunkJavaCallback(nullptr, nullptr, 0, &context);
+      } else if (space->IsRegionSpace()) {
+        heap->IncrementDisableMovingGC(self);
+        self->TransitionFromRunnableToSuspended(kSuspended);
+        ThreadList* tl = Runtime::Current()->GetThreadList();
+        tl->SuspendAll();
+        ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+        context.SetChunkOverhead(0);
+        space->AsRegionSpace()->Walk(BumpPointerSpaceCallback, &context);
+        HeapChunkContext::HeapChunkJavaCallback(nullptr, nullptr, 0, &context);
+        tl->ResumeAll();
+        self->TransitionFromSuspendedToRunnable();
+        heap->DecrementDisableMovingGC(self);
       } else {
         UNIMPLEMENTED(WARNING) << "Not counting objects in space " << *space;
       }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 1fd1150..c049e3d 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -163,6 +163,8 @@
 GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(RosAlloc, gc::kAllocatorTypeRosAlloc)
 GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(BumpPointer, gc::kAllocatorTypeBumpPointer)
 GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(TLAB, gc::kAllocatorTypeTLAB)
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(Region, gc::kAllocatorTypeRegion)
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(RegionTLAB, gc::kAllocatorTypeRegionTLAB)
 
 #define GENERATE_ENTRYPOINTS(suffix) \
 extern "C" void* art_quick_alloc_array##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \
@@ -213,6 +215,8 @@
 GENERATE_ENTRYPOINTS(_rosalloc)
 GENERATE_ENTRYPOINTS(_bump_pointer)
 GENERATE_ENTRYPOINTS(_tlab)
+GENERATE_ENTRYPOINTS(_region)
+GENERATE_ENTRYPOINTS(_region_tlab)
 #endif
 
 static bool entry_points_instrumented = false;
@@ -247,6 +251,16 @@
       SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
       return;
     }
+    case gc::kAllocatorTypeRegion: {
+      CHECK(kMovingCollector);
+      SetQuickAllocEntryPoints_region(qpoints, entry_points_instrumented);
+      return;
+    }
+    case gc::kAllocatorTypeRegionTLAB: {
+      CHECK(kMovingCollector);
+      SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      return;
+    }
     default:
       break;
   }
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 1313263..daa24c9 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -130,8 +130,11 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, sizeof(void*));
-    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.held_mutexes, Thread, wait_mutex_,
-                       sizeof(void*) * kLockLevelCount + sizeof(void*), thread_tlsptr_end);
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, nested_signal_state,
+                        sizeof(void*) * kLockLevelCount);
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, nested_signal_state, flip_function, sizeof(void*));
+    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.flip_function, Thread, wait_mutex_, sizeof(void*),
+                       thread_tlsptr_end);
   }
 
   void CheckInterpreterEntryPoints() {
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 94753d4..83f3ae1 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -340,7 +340,8 @@
   // TODO: check the GC maps to make sure it's an object.
   // Check that the class pointer inside the object is not null and is aligned.
   // TODO: Method might be not a heap address, and GetClass could fault.
-  mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
+  // No read barrier because method_obj may not be a real object.
+  mirror::Class* cls = method_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
   if (cls == nullptr) {
     VLOG(signals) << "not a class";
     return false;
@@ -440,4 +441,3 @@
 }
 
 }   // namespace art
-
diff --git a/runtime/gc/accounting/heap_bitmap-inl.h b/runtime/gc/accounting/heap_bitmap-inl.h
index 34c15c7..8fcc87d 100644
--- a/runtime/gc/accounting/heap_bitmap-inl.h
+++ b/runtime/gc/accounting/heap_bitmap-inl.h
@@ -105,6 +105,15 @@
   return nullptr;
 }
 
+inline LargeObjectBitmap* HeapBitmap::GetLargeObjectBitmap(const mirror::Object* obj) const {
+  for (const auto& bitmap : large_object_bitmaps_) {
+    if (LIKELY(bitmap->HasAddress(obj))) {
+      return bitmap;
+    }
+  }
+  return nullptr;
+}
+
 }  // namespace accounting
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index ca6dc46..245e074 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -27,6 +27,10 @@
 
 class Heap;
 
+namespace collector {
+  class ConcurrentCopying;
+}  // namespace collector
+
 namespace accounting {
 
 class HeapBitmap {
@@ -40,6 +44,7 @@
   bool AtomicTestAndSet(const mirror::Object* obj, const LargeObjectSetVisitor& visitor)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) ALWAYS_INLINE;
   ContinuousSpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
+  LargeObjectBitmap* GetLargeObjectBitmap(const mirror::Object* obj) const;
 
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -78,6 +83,7 @@
       large_object_bitmaps_;
 
   friend class art::gc::Heap;
+  friend class art::gc::collector::ConcurrentCopying;
 };
 
 }  // namespace accounting
diff --git a/runtime/gc/accounting/read_barrier_table.h b/runtime/gc/accounting/read_barrier_table.h
new file mode 100644
index 0000000..84d5da3
--- /dev/null
+++ b/runtime/gc/accounting/read_barrier_table.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ACCOUNTING_READ_BARRIER_TABLE_H_
+#define ART_RUNTIME_GC_ACCOUNTING_READ_BARRIER_TABLE_H_
+
+#include "base/mutex.h"
+#include "gc/space/space.h"
+#include "globals.h"
+#include "mem_map.h"
+
+namespace art {
+namespace gc {
+namespace accounting {
+
+// Used to decide whether to take the read barrier fast/slow paths for
+// kUseTableLookupReadBarrier. If an entry is set, take the read
+// barrier slow path. There's an entry per region.
+class ReadBarrierTable {
+ public:
+  ReadBarrierTable() {
+    size_t capacity = static_cast<size_t>(kHeapCapacity / kRegionSize);
+    DCHECK_EQ(kHeapCapacity / kRegionSize,
+              static_cast<uint64_t>(static_cast<size_t>(kHeapCapacity / kRegionSize)));
+    std::string error_msg;
+    MemMap* mem_map = MemMap::MapAnonymous("read barrier table", nullptr, capacity,
+                                           PROT_READ | PROT_WRITE, false, &error_msg);
+    CHECK(mem_map != nullptr && mem_map->Begin() != nullptr)
+        << "couldn't allocate read barrier table: " << error_msg;
+    mem_map_.reset(mem_map);
+  }
+  void ClearForSpace(space::ContinuousSpace* space) {
+    uint8_t* entry_start = EntryFromAddr(space->Begin());
+    uint8_t* entry_end = EntryFromAddr(space->Limit());
+    memset(reinterpret_cast<void*>(entry_start), 0, entry_end - entry_start);
+  }
+  void Clear(uint8_t* start_addr, uint8_t* end_addr) {
+    DCHECK(IsValidHeapAddr(start_addr)) << start_addr;
+    DCHECK(IsValidHeapAddr(end_addr)) << end_addr;
+    DCHECK(IsAligned<kRegionSize>(start_addr));
+    DCHECK(IsAligned<kRegionSize>(end_addr));
+    uint8_t* entry_start = EntryFromAddr(start_addr);
+    uint8_t* entry_end = EntryFromAddr(end_addr);
+    memset(reinterpret_cast<void*>(entry_start), 0, entry_end - entry_start);
+  }
+  bool IsSet(const void* heap_addr) const {
+    DCHECK(IsValidHeapAddr(heap_addr)) << heap_addr;
+    uint8_t entry_value = *EntryFromAddr(heap_addr);
+    DCHECK(entry_value == 0 || entry_value == kSetEntryValue);
+    return entry_value == kSetEntryValue;
+  }
+  void ClearAll() {
+    mem_map_->MadviseDontNeedAndZero();
+  }
+  void SetAll() {
+    memset(mem_map_->Begin(), kSetEntryValue, mem_map_->Size());
+  }
+  bool IsAllCleared() const {
+    for (uint32_t* p = reinterpret_cast<uint32_t*>(mem_map_->Begin());
+         p < reinterpret_cast<uint32_t*>(mem_map_->End()); ++p) {
+      if (*p != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // This should match RegionSpace::kRegionSize. static_assert'ed in concurrent_copying.h.
+  static constexpr size_t kRegionSize = 1 * MB;
+
+ private:
+  static constexpr uint64_t kHeapCapacity = 4ULL * GB;  // low 4gb.
+  static constexpr uint8_t kSetEntryValue = 0x01;
+
+  uint8_t* EntryFromAddr(const void* heap_addr) const {
+    DCHECK(IsValidHeapAddr(heap_addr)) << heap_addr;
+    uint8_t* entry_addr = mem_map_->Begin() + reinterpret_cast<uintptr_t>(heap_addr) / kRegionSize;
+    DCHECK(IsValidEntry(entry_addr)) << "heap_addr: " << heap_addr
+                                     << " entry_addr: " << reinterpret_cast<void*>(entry_addr);
+    return entry_addr;
+  }
+
+  bool IsValidHeapAddr(const void* heap_addr) const {
+#ifdef __LP64__
+    return reinterpret_cast<uint64_t>(heap_addr) < kHeapCapacity;
+#else
+    UNUSED(heap_addr);
+    return true;
+#endif
+  }
+
+  bool IsValidEntry(const uint8_t* entry_addr) const {
+    uint8_t* begin = mem_map_->Begin();
+    uint8_t* end = mem_map_->End();
+    return entry_addr >= begin && entry_addr < end;
+  }
+
+  std::unique_ptr<MemMap> mem_map_;
+};
+
+}  // namespace accounting
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ACCOUNTING_READ_BARRIER_TABLE_H_
diff --git a/runtime/gc/allocator_type.h b/runtime/gc/allocator_type.h
index c6ebc73..f9a2ff6 100644
--- a/runtime/gc/allocator_type.h
+++ b/runtime/gc/allocator_type.h
@@ -30,6 +30,8 @@
   kAllocatorTypeDlMalloc,  // Use dlmalloc allocator, has entrypoints.
   kAllocatorTypeNonMoving,  // Special allocator for non moving objects, doesn't have entrypoints.
   kAllocatorTypeLOS,  // Large object space, also doesn't have entrypoints.
+  kAllocatorTypeRegion,
+  kAllocatorTypeRegionTLAB,
 };
 std::ostream& operator<<(std::ostream& os, const AllocatorType& rhs);
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 079eeba..5fa3c8b 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -16,10 +16,1627 @@
 
 #include "concurrent_copying.h"
 
+#include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space.h"
+#include "intern_table.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/object-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+#include "well_known_classes.h"
+
 namespace art {
 namespace gc {
 namespace collector {
 
+ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
+    : GarbageCollector(heap,
+                       name_prefix + (name_prefix.empty() ? "" : " ") +
+                       "concurrent copying + mark sweep"),
+      region_space_(nullptr), gc_barrier_(new Barrier(0)), mark_queue_(2 * MB),
+      is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
+      heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0),
+      skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
+      rb_table_(heap_->GetReadBarrierTable()),
+      force_evacuate_all_(false) {
+  static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
+                "The region space size and the read barrier table region size must match");
+  cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap));
+  {
+    Thread* self = Thread::Current();
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    // Cache this so that we won't have to lock heap_bitmap_lock_ in
+    // Mark() which could cause a nested lock on heap_bitmap_lock_
+    // when GC causes a RB while doing GC or a lock order violation
+    // (class_linker_lock_ and heap_bitmap_lock_).
+    heap_mark_bitmap_ = heap->GetMarkBitmap();
+  }
+}
+
+ConcurrentCopying::~ConcurrentCopying() {
+}
+
+void ConcurrentCopying::RunPhases() {
+  CHECK(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
+  CHECK(!is_active_);
+  is_active_ = true;
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    InitializePhase();
+  }
+  FlipThreadRoots();
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    MarkingPhase();
+  }
+  // Verify no from space refs. This causes a pause.
+  if (kEnableNoFromSpaceRefsVerification || kIsDebugBuild) {
+    TimingLogger::ScopedTiming split("(Paused)VerifyNoFromSpaceReferences", GetTimings());
+    ScopedPause pause(this);
+    CheckEmptyMarkQueue();
+    if (kVerboseMode) {
+      LOG(INFO) << "Verifying no from-space refs";
+    }
+    VerifyNoFromSpaceReferences();
+    CheckEmptyMarkQueue();
+  }
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    ReclaimPhase();
+  }
+  FinishPhase();
+  CHECK(is_active_);
+  is_active_ = false;
+}
+
+void ConcurrentCopying::BindBitmaps() {
+  Thread* self = Thread::Current();
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  // Mark all of the spaces we never collect as immune.
+  for (const auto& space : heap_->GetContinuousSpaces()) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
+        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+      CHECK(space->IsZygoteSpace() || space->IsImageSpace());
+      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" :
+          "cc zygote space bitmap";
+      // TODO: try avoiding using bitmaps for image/zygote to save space.
+      accounting::ContinuousSpaceBitmap* bitmap =
+          accounting::ContinuousSpaceBitmap::Create(bitmap_name, space->Begin(), space->Capacity());
+      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
+      cc_bitmaps_.push_back(bitmap);
+    } else if (space == region_space_) {
+      accounting::ContinuousSpaceBitmap* bitmap =
+          accounting::ContinuousSpaceBitmap::Create("cc region space bitmap",
+                                                    space->Begin(), space->Capacity());
+      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
+      cc_bitmaps_.push_back(bitmap);
+      region_space_bitmap_ = bitmap;
+    }
+  }
+}
+
+void ConcurrentCopying::InitializePhase() {
+  TimingLogger::ScopedTiming split("InitializePhase", GetTimings());
+  if (kVerboseMode) {
+    LOG(INFO) << "GC InitializePhase";
+    LOG(INFO) << "Region-space : " << reinterpret_cast<void*>(region_space_->Begin()) << "-"
+              << reinterpret_cast<void*>(region_space_->Limit());
+  }
+  CHECK(mark_queue_.IsEmpty());
+  immune_region_.Reset();
+  bytes_moved_.StoreRelaxed(0);
+  objects_moved_.StoreRelaxed(0);
+  if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
+      GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAlloc ||
+      GetCurrentIteration()->GetClearSoftReferences()) {
+    force_evacuate_all_ = true;
+  } else {
+    force_evacuate_all_ = false;
+  }
+  BindBitmaps();
+  if (kVerboseMode) {
+    LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_;
+    LOG(INFO) << "Immune region: " << immune_region_.Begin() << "-" << immune_region_.End();
+    LOG(INFO) << "GC end of InitializePhase";
+  }
+}
+
+// Used to switch the thread roots of a thread from from-space refs to to-space refs.
+class ThreadFlipVisitor : public Closure {
+ public:
+  explicit ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab)
+      : concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) {
+  }
+
+  virtual void Run(Thread* thread) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Note: self is not necessarily equal to thread since thread may be suspended.
+    Thread* self = Thread::Current();
+    CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
+        << thread->GetState() << " thread " << thread << " self " << self;
+    if (use_tlab_ && thread->HasTlab()) {
+      if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
+        // This must come before the revoke.
+        size_t thread_local_objects = thread->GetThreadLocalObjectsAllocated();
+        concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread);
+        reinterpret_cast<Atomic<size_t>*>(&concurrent_copying_->from_space_num_objects_at_first_pause_)->
+            FetchAndAddSequentiallyConsistent(thread_local_objects);
+      } else {
+        concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread);
+      }
+    }
+    if (kUseThreadLocalAllocationStack) {
+      thread->RevokeThreadLocalAllocationStack();
+    }
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    thread->VisitRoots(ConcurrentCopying::ProcessRootCallback, concurrent_copying_);
+    concurrent_copying_->GetBarrier().Pass(self);
+  }
+
+ private:
+  ConcurrentCopying* const concurrent_copying_;
+  const bool use_tlab_;
+};
+
+// Called back from Runtime::FlipThreadRoots() during a pause.
+class FlipCallback : public Closure {
+ public:
+  explicit FlipCallback(ConcurrentCopying* concurrent_copying)
+      : concurrent_copying_(concurrent_copying) {
+  }
+
+  virtual void Run(Thread* thread) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ConcurrentCopying* cc = concurrent_copying_;
+    TimingLogger::ScopedTiming split("(Paused)FlipCallback", cc->GetTimings());
+    // Note: self is not necessarily equal to thread since thread may be suspended.
+    Thread* self = Thread::Current();
+    CHECK(thread == self);
+    Locks::mutator_lock_->AssertExclusiveHeld(self);
+    cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
+    cc->SwapStacks(self);
+    if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
+      cc->RecordLiveStackFreezeSize(self);
+      cc->from_space_num_objects_at_first_pause_ = cc->region_space_->GetObjectsAllocated();
+      cc->from_space_num_bytes_at_first_pause_ = cc->region_space_->GetBytesAllocated();
+    }
+    cc->is_marking_ = true;
+    if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
+      CHECK(Runtime::Current()->IsCompiler());
+      TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
+      Runtime::Current()->VisitTransactionRoots(ConcurrentCopying::ProcessRootCallback, cc);
+    }
+  }
+
+ private:
+  ConcurrentCopying* const concurrent_copying_;
+};
+
+// Switch threads that from from-space to to-space refs. Forward/mark the thread roots.
+void ConcurrentCopying::FlipThreadRoots() {
+  TimingLogger::ScopedTiming split("FlipThreadRoots", GetTimings());
+  if (kVerboseMode) {
+    LOG(INFO) << "time=" << region_space_->Time();
+    region_space_->DumpNonFreeRegions(LOG(INFO));
+  }
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  gc_barrier_->Init(self, 0);
+  ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
+  FlipCallback flip_callback(this);
+  size_t barrier_count = Runtime::Current()->FlipThreadRoots(
+      &thread_flip_visitor, &flip_callback, this);
+  {
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    gc_barrier_->Increment(self, barrier_count);
+  }
+  is_asserting_to_space_invariant_ = true;
+  QuasiAtomic::ThreadFenceForConstructor();
+  if (kVerboseMode) {
+    LOG(INFO) << "time=" << region_space_->Time();
+    region_space_->DumpNonFreeRegions(LOG(INFO));
+    LOG(INFO) << "GC end of FlipThreadRoots";
+  }
+}
+
+void ConcurrentCopying::SwapStacks(Thread* self) {
+  heap_->SwapStacks(self);
+}
+
+void ConcurrentCopying::RecordLiveStackFreezeSize(Thread* self) {
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
+}
+
+// Used to visit objects in the immune spaces.
+class ConcurrentCopyingImmuneSpaceObjVisitor {
+ public:
+  explicit ConcurrentCopyingImmuneSpaceObjVisitor(ConcurrentCopying* cc)
+      : collector_(cc) {}
+
+  void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    DCHECK(obj != nullptr);
+    DCHECK(collector_->immune_region_.ContainsObject(obj));
+    accounting::ContinuousSpaceBitmap* cc_bitmap =
+        collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
+    DCHECK(cc_bitmap != nullptr)
+        << "An immune space object must have a bitmap";
+    if (kIsDebugBuild) {
+      DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj))
+          << "Immune space object must be already marked";
+    }
+    // This may or may not succeed, which is ok.
+    if (kUseBakerReadBarrier) {
+      obj->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+    }
+    if (cc_bitmap->AtomicTestAndSet(obj)) {
+      // Already marked. Do nothing.
+    } else {
+      // Newly marked. Set the gray bit and push it onto the mark stack.
+      CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+      collector_->PushOntoMarkStack<true>(obj);
+    }
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+class EmptyCheckpoint : public Closure {
+ public:
+  explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying)
+      : concurrent_copying_(concurrent_copying) {
+  }
+
+  virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
+    // Note: self is not necessarily equal to thread since thread may be suspended.
+    Thread* self = Thread::Current();
+    CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
+        << thread->GetState() << " thread " << thread << " self " << self;
+    concurrent_copying_->GetBarrier().Pass(self);
+  }
+
+ private:
+  ConcurrentCopying* const concurrent_copying_;
+};
+
+// Concurrently mark roots that are guarded by read barriers and process the mark stack.
+void ConcurrentCopying::MarkingPhase() {
+  TimingLogger::ScopedTiming split("MarkingPhase", GetTimings());
+  if (kVerboseMode) {
+    LOG(INFO) << "GC MarkingPhase";
+  }
+  {
+    // Mark the image root. The WB-based collectors do not need to
+    // scan the image objects from roots by relying on the card table,
+    // but it's necessary for the RB to-space invariant to hold.
+    TimingLogger::ScopedTiming split1("VisitImageRoots", GetTimings());
+    gc::space::ImageSpace* image = heap_->GetImageSpace();
+    if (image != nullptr) {
+      mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots();
+      mirror::Object* marked_image_root = Mark(image_root);
+      CHECK_EQ(image_root, marked_image_root) << "An image object does not move";
+      if (ReadBarrier::kEnableToSpaceInvariantChecks) {
+        AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root);
+      }
+    }
+  }
+  {
+    TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
+    Runtime::Current()->VisitConstantRoots(ProcessRootCallback, this);
+  }
+  {
+    TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
+    Runtime::Current()->GetInternTable()->VisitRoots(ProcessRootCallback,
+                                                     this, kVisitRootFlagAllRoots);
+  }
+  {
+    TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
+    Runtime::Current()->GetClassLinker()->VisitRoots(ProcessRootCallback,
+                                                     this, kVisitRootFlagAllRoots);
+  }
+  {
+    // TODO: don't visit the transaction roots if it's not active.
+    TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
+    Runtime::Current()->VisitNonThreadRoots(ProcessRootCallback, this);
+  }
+
+  // Immune spaces.
+  for (auto& space : heap_->GetContinuousSpaces()) {
+    if (immune_region_.ContainsSpace(space)) {
+      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      ConcurrentCopyingImmuneSpaceObjVisitor visitor(this);
+      live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                    reinterpret_cast<uintptr_t>(space->Limit()),
+                                    visitor);
+    }
+  }
+
+  Thread* self = Thread::Current();
+  {
+    TimingLogger::ScopedTiming split6("ProcessMarkStack", GetTimings());
+    // Process the mark stack and issue an empty check point. If the
+    // mark stack is still empty after the check point, we're
+    // done. Otherwise, repeat.
+    ProcessMarkStack();
+    size_t count = 0;
+    while (!ProcessMarkStack()) {
+      ++count;
+      if (kVerboseMode) {
+        LOG(INFO) << "Issue an empty check point. " << count;
+      }
+      IssueEmptyCheckpoint();
+    }
+    // Need to ensure the mark stack is empty before reference
+    // processing to get rid of non-reference gray objects.
+    CheckEmptyMarkQueue();
+    // Enable the GetReference slow path and disallow access to the system weaks.
+    GetHeap()->GetReferenceProcessor()->EnableSlowPath();
+    Runtime::Current()->DisallowNewSystemWeaks();
+    QuasiAtomic::ThreadFenceForConstructor();
+    // Lock-unlock the system weak locks so that there's no thread in
+    // the middle of accessing system weaks.
+    Runtime::Current()->EnsureNewSystemWeaksDisallowed();
+    // Note: Do not issue a checkpoint from here to the
+    // SweepSystemWeaks call or else a deadlock due to
+    // WaitHoldingLocks() would occur.
+    if (kVerboseMode) {
+      LOG(INFO) << "Enabled the ref proc slow path & disabled access to system weaks.";
+      LOG(INFO) << "ProcessReferences";
+    }
+    ProcessReferences(self, true);
+    CheckEmptyMarkQueue();
+    if (kVerboseMode) {
+      LOG(INFO) << "SweepSystemWeaks";
+    }
+    SweepSystemWeaks(self);
+    if (kVerboseMode) {
+      LOG(INFO) << "SweepSystemWeaks done";
+    }
+    // Because hash_set::Erase() can call the hash function for
+    // arbitrary elements in the weak intern table in
+    // InternTable::Table::SweepWeaks(), the above SweepSystemWeaks()
+    // call may have marked some objects (strings) alive. So process
+    // the mark stack here once again.
+    ProcessMarkStack();
+    CheckEmptyMarkQueue();
+    // Disable marking.
+    if (kUseTableLookupReadBarrier) {
+      heap_->rb_table_->ClearAll();
+      DCHECK(heap_->rb_table_->IsAllCleared());
+    }
+    is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(1);
+    is_marking_ = false;
+    if (kVerboseMode) {
+      LOG(INFO) << "AllowNewSystemWeaks";
+    }
+    Runtime::Current()->AllowNewSystemWeaks();
+    CheckEmptyMarkQueue();
+  }
+
+  if (kVerboseMode) {
+    LOG(INFO) << "GC end of MarkingPhase";
+  }
+}
+
+void ConcurrentCopying::IssueEmptyCheckpoint() {
+  Thread* self = Thread::Current();
+  EmptyCheckpoint check_point(this);
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  gc_barrier_->Init(self, 0);
+  size_t barrier_count = thread_list->RunCheckpoint(&check_point);
+  // Release locks then wait for all mutator threads to pass the barrier.
+  Locks::mutator_lock_->SharedUnlock(self);
+  {
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    gc_barrier_->Increment(self, barrier_count);
+  }
+  Locks::mutator_lock_->SharedLock(self);
+}
+
+mirror::Object* ConcurrentCopying::PopOffMarkStack() {
+  return mark_queue_.Dequeue();
+}
+
+template<bool kThreadSafe>
+void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) {
+  CHECK_EQ(is_mark_queue_push_disallowed_.LoadRelaxed(), 0)
+      << " " << to_ref << " " << PrettyTypeOf(to_ref);
+  if (kThreadSafe) {
+    CHECK(mark_queue_.Enqueue(to_ref)) << "Mark queue overflow";
+  } else {
+    CHECK(mark_queue_.EnqueueThreadUnsafe(to_ref)) << "Mark queue overflow";
+  }
+}
+
+accounting::ObjectStack* ConcurrentCopying::GetAllocationStack() {
+  return heap_->allocation_stack_.get();
+}
+
+accounting::ObjectStack* ConcurrentCopying::GetLiveStack() {
+  return heap_->live_stack_.get();
+}
+
+inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
+  DCHECK(region_space_->IsInFromSpace(from_ref));
+  LockWord lw = from_ref->GetLockWord(false);
+  if (lw.GetState() == LockWord::kForwardingAddress) {
+    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
+    CHECK(fwd_ptr != nullptr);
+    return fwd_ptr;
+  } else {
+    return nullptr;
+  }
+}
+
+inline void ConcurrentCopying::SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref) {
+  DCHECK(region_space_->IsInFromSpace(from_ref));
+  DCHECK(region_space_->IsInToSpace(to_ref) || heap_->GetNonMovingSpace()->HasAddress(to_ref));
+  LockWord lw = from_ref->GetLockWord(false);
+  DCHECK_NE(lw.GetState(), LockWord::kForwardingAddress);
+  from_ref->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref)), false);
+}
+
+// The following visitors are that used to verify that there's no
+// references to the from-space left after marking.
+class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor {
+ public:
+  explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    if (ref == nullptr) {
+      // OK.
+      return;
+    }
+    collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    if (kUseBakerReadBarrier) {
+      if (collector_->RegionSpace()->IsInToSpace(ref)) {
+        CHECK(ref->GetReadBarrierPointer() == nullptr)
+            << "To-space ref " << ref << " " << PrettyTypeOf(ref)
+            << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
+      } else {
+        CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
+              (ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
+               collector_->IsOnAllocStack(ref)))
+            << "Non-moving/unevac from space ref " << ref << " " << PrettyTypeOf(ref)
+            << " has non-black rb_ptr " << ref->GetReadBarrierPointer()
+            << " but isn't on the alloc stack (and has white rb_ptr)."
+            << " Is it in the non-moving space="
+            << (collector_->GetHeap()->GetNonMovingSpace()->HasAddress(ref));
+      }
+    }
+  }
+
+  static void RootCallback(mirror::Object** root, void *arg, const RootInfo& /*root_info*/)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
+    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector);
+    DCHECK(root != nullptr);
+    visitor(*root);
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+class ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor {
+ public:
+  explicit ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    mirror::Object* ref =
+        obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
+    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor visitor(collector_);
+    visitor(ref);
+  }
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+    this->operator()(ref, mirror::Reference::ReferentOffset(), false);
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+class ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor {
+ public:
+  explicit ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+  void operator()(mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ObjectCallback(obj, collector_);
+  }
+  static void ObjectCallback(mirror::Object* obj, void *arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CHECK(obj != nullptr);
+    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
+    space::RegionSpace* region_space = collector->RegionSpace();
+    CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
+    ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor visitor(collector);
+    obj->VisitReferences<true>(visitor, visitor);
+    if (kUseBakerReadBarrier) {
+      if (collector->RegionSpace()->IsInToSpace(obj)) {
+        CHECK(obj->GetReadBarrierPointer() == nullptr)
+            << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
+      } else {
+        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
+              (obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
+               collector->IsOnAllocStack(obj)))
+            << "Non-moving space/unevac from space ref " << obj << " " << PrettyTypeOf(obj)
+            << " has non-black rb_ptr " << obj->GetReadBarrierPointer()
+            << " but isn't on the alloc stack (and has white rb_ptr). Is it in the non-moving space="
+            << (collector->GetHeap()->GetNonMovingSpace()->HasAddress(obj));
+      }
+    }
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+};
+
+// Verify there's no from-space references left after the marking phase.
+void ConcurrentCopying::VerifyNoFromSpaceReferences() {
+  Thread* self = Thread::Current();
+  DCHECK(Locks::mutator_lock_->IsExclusiveHeld(self));
+  ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor visitor(this);
+  // Roots.
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    Runtime::Current()->VisitRoots(
+        ConcurrentCopyingVerifyNoFromSpaceRefsVisitor::RootCallback, this);
+  }
+  // The to-space.
+  region_space_->WalkToSpace(ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor::ObjectCallback,
+                             this);
+  // Non-moving spaces.
+  {
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    heap_->GetMarkBitmap()->Visit(visitor);
+  }
+  // The alloc stack.
+  {
+    ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
+    for (mirror::Object** it = heap_->allocation_stack_->Begin(),
+             **end = heap_->allocation_stack_->End(); it < end; ++it) {
+      mirror::Object* obj = *it;
+      if (obj != nullptr && obj->GetClass() != nullptr) {
+        // TODO: need to call this only if obj is alive?
+        ref_visitor(obj);
+        visitor(obj);
+      }
+    }
+  }
+  // TODO: LOS. But only refs in LOS are classes.
+}
+
+// The following visitors are used to assert the to-space invariant.
+class ConcurrentCopyingAssertToSpaceInvariantRefsVisitor {
+ public:
+  explicit ConcurrentCopyingAssertToSpaceInvariantRefsVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    if (ref == nullptr) {
+      // OK.
+      return;
+    }
+    collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+  }
+  static void RootCallback(mirror::Object** root, void *arg, const RootInfo& /*root_info*/)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
+    ConcurrentCopyingAssertToSpaceInvariantRefsVisitor visitor(collector);
+    DCHECK(root != nullptr);
+    visitor(*root);
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+class ConcurrentCopyingAssertToSpaceInvariantFieldVisitor {
+ public:
+  explicit ConcurrentCopyingAssertToSpaceInvariantFieldVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    mirror::Object* ref =
+        obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
+    ConcurrentCopyingAssertToSpaceInvariantRefsVisitor visitor(collector_);
+    visitor(ref);
+  }
+  void operator()(mirror::Class* klass, mirror::Reference* /* ref */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+class ConcurrentCopyingAssertToSpaceInvariantObjectVisitor {
+ public:
+  explicit ConcurrentCopyingAssertToSpaceInvariantObjectVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+  void operator()(mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ObjectCallback(obj, collector_);
+  }
+  static void ObjectCallback(mirror::Object* obj, void *arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CHECK(obj != nullptr);
+    ConcurrentCopying* collector = reinterpret_cast<ConcurrentCopying*>(arg);
+    space::RegionSpace* region_space = collector->RegionSpace();
+    CHECK(!region_space->IsInFromSpace(obj)) << "Scanning object " << obj << " in from space";
+    collector->AssertToSpaceInvariant(nullptr, MemberOffset(0), obj);
+    ConcurrentCopyingAssertToSpaceInvariantFieldVisitor visitor(collector);
+    obj->VisitReferences<true>(visitor, visitor);
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+bool ConcurrentCopying::ProcessMarkStack() {
+  if (kVerboseMode) {
+    LOG(INFO) << "ProcessMarkStack. ";
+  }
+  size_t count = 0;
+  mirror::Object* to_ref;
+  while ((to_ref = PopOffMarkStack()) != nullptr) {
+    ++count;
+    DCHECK(!region_space_->IsInFromSpace(to_ref));
+    if (kUseBakerReadBarrier) {
+      DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
+          << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+          << " is_marked=" << IsMarked(to_ref);
+    }
+    // Scan ref fields.
+    Scan(to_ref);
+    // Mark the gray ref as white or black.
+    if (kUseBakerReadBarrier) {
+      DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
+          << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+          << " is_marked=" << IsMarked(to_ref);
+    }
+    if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
+        to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
+        !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
+      // Leave References gray so that GetReferent() will trigger RB.
+      CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    } else {
+      if (kUseBakerReadBarrier) {
+        if (region_space_->IsInToSpace(to_ref)) {
+          // If to-space, change from gray to white.
+          bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                             ReadBarrier::WhitePtr());
+          CHECK(success) << "Must succeed as we won the race.";
+          CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+        } else {
+          // If non-moving space/unevac from space, change from gray
+          // to black. We can't change gray to white because it's not
+          // safe to use CAS if two threads change values in opposite
+          // directions (A->B and B->A). So, we change it to black to
+          // indicate non-moving objects that have been marked
+          // through. Note we'd need to change from black to white
+          // later (concurrently).
+          bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                             ReadBarrier::BlackPtr());
+          CHECK(success) << "Must succeed as we won the race.";
+          CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        }
+      }
+    }
+    if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
+      ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
+      visitor(to_ref);
+    }
+  }
+  // Return true if the stack was empty.
+  return count == 0;
+}
+
+void ConcurrentCopying::CheckEmptyMarkQueue() {
+  if (!mark_queue_.IsEmpty()) {
+    while (!mark_queue_.IsEmpty()) {
+      mirror::Object* obj = mark_queue_.Dequeue();
+      if (kUseBakerReadBarrier) {
+        mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
+        LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) << " rb_ptr=" << rb_ptr
+                  << " is_marked=" << IsMarked(obj);
+      } else {
+        LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj)
+                  << " is_marked=" << IsMarked(obj);
+      }
+    }
+    LOG(FATAL) << "mark queue is not empty";
+  }
+}
+
+void ConcurrentCopying::SweepSystemWeaks(Thread* self) {
+  TimingLogger::ScopedTiming split("SweepSystemWeaks", GetTimings());
+  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
+}
+
+void ConcurrentCopying::Sweep(bool swap_bitmaps) {
+  {
+    TimingLogger::ScopedTiming t("MarkStackAsLive", GetTimings());
+    accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+    if (kEnableFromSpaceAccountingCheck) {
+      CHECK_GE(live_stack_freeze_size_, live_stack->Size());
+    }
+    heap_->MarkAllocStackAsLive(live_stack);
+    live_stack->Reset();
+  }
+  CHECK(mark_queue_.IsEmpty());
+  TimingLogger::ScopedTiming split("Sweep", GetTimings());
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (space == region_space_ || immune_region_.ContainsSpace(space)) {
+        continue;
+      }
+      TimingLogger::ScopedTiming split2(
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
+      RecordFree(alloc_space->Sweep(swap_bitmaps));
+    }
+  }
+  SweepLargeObjects(swap_bitmaps);
+}
+
+void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) {
+  TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings());
+  RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
+}
+
+class ConcurrentCopyingClearBlackPtrsVisitor {
+ public:
+  explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
+      : collector_(cc) {}
+  void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    DCHECK(obj != nullptr);
+    CHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
+    CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
+    obj->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+    CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+};
+
+// Clear the black ptrs in non-moving objects back to white.
+void ConcurrentCopying::ClearBlackPtrs() {
+  CHECK(kUseBakerReadBarrier);
+  TimingLogger::ScopedTiming split("ClearBlackPtrs", GetTimings());
+  ConcurrentCopyingClearBlackPtrsVisitor visitor(this);
+  for (auto& space : heap_->GetContinuousSpaces()) {
+    if (space == region_space_) {
+      continue;
+    }
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    if (kVerboseMode) {
+      LOG(INFO) << "ClearBlackPtrs: " << *space << " bitmap: " << *mark_bitmap;
+    }
+    mark_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  visitor);
+  }
+  space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
+  large_object_space->GetMarkBitmap()->VisitMarkedRange(
+      reinterpret_cast<uintptr_t>(large_object_space->Begin()),
+      reinterpret_cast<uintptr_t>(large_object_space->End()),
+      visitor);
+  // Objects on the allocation stack?
+  if (ReadBarrier::kEnableReadBarrierInvariantChecks || kIsDebugBuild) {
+    size_t count = GetAllocationStack()->Size();
+    mirror::Object** it = GetAllocationStack()->Begin();
+    mirror::Object** end = GetAllocationStack()->End();
+    for (size_t i = 0; i < count; ++i, ++it) {
+      CHECK(it < end);
+      mirror::Object* obj = *it;
+      if (obj != nullptr) {
+        // Must have been cleared above.
+        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr()) << obj;
+      }
+    }
+  }
+}
+
+void ConcurrentCopying::ReclaimPhase() {
+  TimingLogger::ScopedTiming split("ReclaimPhase", GetTimings());
+  if (kVerboseMode) {
+    LOG(INFO) << "GC ReclaimPhase";
+  }
+  Thread* self = Thread::Current();
+
+  {
+    // Double-check that the mark stack is empty.
+    // Note: need to set this after VerifyNoFromSpaceRef().
+    is_asserting_to_space_invariant_ = false;
+    QuasiAtomic::ThreadFenceForConstructor();
+    if (kVerboseMode) {
+      LOG(INFO) << "Issue an empty check point. ";
+    }
+    IssueEmptyCheckpoint();
+    // Disable the check.
+    is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(0);
+    CheckEmptyMarkQueue();
+  }
+
+  {
+    // Record freed objects.
+    TimingLogger::ScopedTiming split2("RecordFree", GetTimings());
+    // Don't include thread-locals that are in the to-space.
+    uint64_t from_bytes = region_space_->GetBytesAllocatedInFromSpace();
+    uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
+    uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
+    uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
+    uint64_t to_bytes = bytes_moved_.LoadSequentiallyConsistent();
+    uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
+    if (kEnableFromSpaceAccountingCheck) {
+      CHECK_EQ(from_space_num_objects_at_first_pause_, from_objects + unevac_from_objects);
+      CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
+    }
+    CHECK_LE(to_objects, from_objects);
+    CHECK_LE(to_bytes, from_bytes);
+    int64_t freed_bytes = from_bytes - to_bytes;
+    int64_t freed_objects = from_objects - to_objects;
+    if (kVerboseMode) {
+      LOG(INFO) << "RecordFree:"
+                << " from_bytes=" << from_bytes << " from_objects=" << from_objects
+                << " unevac_from_bytes=" << unevac_from_bytes << " unevac_from_objects=" << unevac_from_objects
+                << " to_bytes=" << to_bytes << " to_objects=" << to_objects
+                << " freed_bytes=" << freed_bytes << " freed_objects=" << freed_objects
+                << " from_space size=" << region_space_->FromSpaceSize()
+                << " unevac_from_space size=" << region_space_->UnevacFromSpaceSize()
+                << " to_space size=" << region_space_->ToSpaceSize();
+      LOG(INFO) << "(before) num_bytes_allocated=" << heap_->num_bytes_allocated_.LoadSequentiallyConsistent();
+    }
+    RecordFree(ObjectBytePair(freed_objects, freed_bytes));
+    if (kVerboseMode) {
+      LOG(INFO) << "(after) num_bytes_allocated=" << heap_->num_bytes_allocated_.LoadSequentiallyConsistent();
+    }
+  }
+
+  {
+    TimingLogger::ScopedTiming split3("ComputeUnevacFromSpaceLiveRatio", GetTimings());
+    ComputeUnevacFromSpaceLiveRatio();
+  }
+
+  {
+    TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
+    region_space_->ClearFromSpace();
+  }
+
+  {
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    if (kUseBakerReadBarrier) {
+      ClearBlackPtrs();
+    }
+    Sweep(false);
+    SwapBitmaps();
+    heap_->UnBindBitmaps();
+
+    // Remove bitmaps for the immune spaces.
+    while (!cc_bitmaps_.empty()) {
+      accounting::ContinuousSpaceBitmap* cc_bitmap = cc_bitmaps_.back();
+      cc_heap_bitmap_->RemoveContinuousSpaceBitmap(cc_bitmap);
+      delete cc_bitmap;
+      cc_bitmaps_.pop_back();
+    }
+    region_space_bitmap_ = nullptr;
+  }
+
+  if (kVerboseMode) {
+    LOG(INFO) << "GC end of ReclaimPhase";
+  }
+}
+
+class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor {
+ public:
+  explicit ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor(ConcurrentCopying* cc)
+      : collector_(cc) {}
+  void operator()(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    DCHECK(ref != nullptr);
+    CHECK(collector_->region_space_bitmap_->Test(ref)) << ref;
+    CHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref;
+    if (kUseBakerReadBarrier) {
+      CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr()) << ref;
+      // Clear the black ptr.
+      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+    }
+    size_t obj_size = ref->SizeOf();
+    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
+    collector_->region_space_->AddLiveBytes(ref, alloc_size);
+  }
+
+ private:
+  ConcurrentCopying* collector_;
+};
+
+// Compute how much live objects are left in regions.
+void ConcurrentCopying::ComputeUnevacFromSpaceLiveRatio() {
+  region_space_->AssertAllRegionLiveBytesZeroOrCleared();
+  ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor visitor(this);
+  region_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(region_space_->Begin()),
+                                         reinterpret_cast<uintptr_t>(region_space_->Limit()),
+                                         visitor);
+}
+
+// Assert the to-space invariant.
+void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
+                                               mirror::Object* ref) {
+  CHECK(heap_->collector_type_ == kCollectorTypeCC) << static_cast<size_t>(heap_->collector_type_);
+  if (is_asserting_to_space_invariant_) {
+    if (region_space_->IsInToSpace(ref)) {
+      // OK.
+      return;
+    } else if (region_space_->IsInUnevacFromSpace(ref)) {
+      CHECK(region_space_bitmap_->Test(ref)) << ref;
+    } else if (region_space_->IsInFromSpace(ref)) {
+      // Not OK. Do extra logging.
+      if (obj != nullptr) {
+        if (kUseBakerReadBarrier) {
+          LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj)
+                    << " holder rb_ptr=" << obj->GetReadBarrierPointer();
+        } else {
+          LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj);
+        }
+        if (region_space_->IsInFromSpace(obj)) {
+          LOG(INFO) << "holder is in the from-space.";
+        } else if (region_space_->IsInToSpace(obj)) {
+          LOG(INFO) << "holder is in the to-space.";
+        } else if (region_space_->IsInUnevacFromSpace(obj)) {
+          LOG(INFO) << "holder is in the unevac from-space.";
+          if (region_space_bitmap_->Test(obj)) {
+            LOG(INFO) << "holder is marked in the region space bitmap.";
+          } else {
+            LOG(INFO) << "holder is not marked in the region space bitmap.";
+          }
+        } else {
+          // In a non-moving space.
+          if (immune_region_.ContainsObject(obj)) {
+            LOG(INFO) << "holder is in the image or the zygote space.";
+            accounting::ContinuousSpaceBitmap* cc_bitmap =
+                cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
+            CHECK(cc_bitmap != nullptr)
+                << "An immune space object must have a bitmap.";
+            if (cc_bitmap->Test(obj)) {
+              LOG(INFO) << "holder is marked in the bit map.";
+            } else {
+              LOG(INFO) << "holder is NOT marked in the bit map.";
+            }
+          } else {
+            LOG(INFO) << "holder is in a non-moving (or main) space.";
+            accounting::ContinuousSpaceBitmap* mark_bitmap =
+                heap_mark_bitmap_->GetContinuousSpaceBitmap(obj);
+            accounting::LargeObjectBitmap* los_bitmap =
+                heap_mark_bitmap_->GetLargeObjectBitmap(obj);
+            CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+            bool is_los = mark_bitmap == nullptr;
+            if (!is_los && mark_bitmap->Test(obj)) {
+              LOG(INFO) << "holder is marked in the mark bit map.";
+            } else if (is_los && los_bitmap->Test(obj)) {
+              LOG(INFO) << "holder is marked in the los bit map.";
+            } else {
+              // If ref is on the allocation stack, then it is considered
+              // mark/alive (but not necessarily on the live stack.)
+              if (IsOnAllocStack(obj)) {
+                LOG(INFO) << "holder is on the alloc stack.";
+              } else {
+                LOG(INFO) << "holder is not marked or on the alloc stack.";
+              }
+            }
+          }
+        }
+        LOG(INFO) << "offset=" << offset.SizeValue();
+      }
+      CHECK(false) << "Found from-space ref " << ref << " " << PrettyTypeOf(ref);
+    } else {
+      // In a non-moving spaces. Check that the ref is marked.
+      if (immune_region_.ContainsObject(ref)) {
+        accounting::ContinuousSpaceBitmap* cc_bitmap =
+            cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
+        CHECK(cc_bitmap != nullptr)
+            << "An immune space ref must have a bitmap. " << ref;
+        if (kUseBakerReadBarrier) {
+          CHECK(cc_bitmap->Test(ref))
+              << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
+              << obj->GetReadBarrierPointer() << " ref=" << ref;
+        } else {
+          CHECK(cc_bitmap->Test(ref))
+              << "Unmarked immune space ref. obj=" << obj << " ref=" << ref;
+        }
+      } else {
+        accounting::ContinuousSpaceBitmap* mark_bitmap =
+            heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
+        accounting::LargeObjectBitmap* los_bitmap =
+            heap_mark_bitmap_->GetLargeObjectBitmap(ref);
+        CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+        bool is_los = mark_bitmap == nullptr;
+        if ((!is_los && mark_bitmap->Test(ref)) ||
+            (is_los && los_bitmap->Test(ref))) {
+          // OK.
+        } else {
+          // If ref is on the allocation stack, then it may not be
+          // marked live, but considered marked/alive (but not
+          // necessarily on the live stack).
+          CHECK(IsOnAllocStack(ref)) << "Unmarked ref that's not on the allocation stack. "
+                                     << "obj=" << obj << " ref=" << ref;
+        }
+      }
+    }
+  }
+}
+
+void ConcurrentCopying::ProcessRootCallback(mirror::Object** root, void* arg,
+                                            const RootInfo& /*root_info*/) {
+  reinterpret_cast<ConcurrentCopying*>(arg)->Process(root);
+}
+
+// Used to scan ref fields of an object.
+class ConcurrentCopyingRefFieldsVisitor {
+ public:
+  explicit ConcurrentCopyingRefFieldsVisitor(ConcurrentCopying* collector)
+      : collector_(collector) {}
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
+      const ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    collector_->Process(obj, offset);
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    CHECK(klass->IsTypeOfReferenceClass());
+    collector_->DelayReferenceReferent(klass, ref);
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+};
+
+// Scan ref fields of an object.
+void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+  DCHECK(!region_space_->IsInFromSpace(to_ref));
+  ConcurrentCopyingRefFieldsVisitor visitor(this);
+  to_ref->VisitReferences<true>(visitor, visitor);
+}
+
+// Process a field.
+inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) {
+  mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
+  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+    return;
+  }
+  mirror::Object* to_ref = Mark(ref);
+  if (to_ref == ref) {
+    return;
+  }
+  // This may fail if the mutator writes to the field at the same time. But it's ok.
+  mirror::Object* expected_ref = ref;
+  mirror::Object* new_ref = to_ref;
+  do {
+    if (expected_ref !=
+        obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset)) {
+      // It was updated by the mutator.
+      break;
+    }
+  } while (!obj->CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier<false, false, kVerifyNone>(
+      offset, expected_ref, new_ref));
+}
+
+// Process a root.
+void ConcurrentCopying::Process(mirror::Object** root) {
+  mirror::Object* ref = *root;
+  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
+    return;
+  }
+  mirror::Object* to_ref = Mark(ref);
+  if (to_ref == ref) {
+    return;
+  }
+  Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+  mirror::Object* expected_ref = ref;
+  mirror::Object* new_ref = to_ref;
+  do {
+    if (expected_ref != addr->LoadRelaxed()) {
+      // It was updated by the mutator.
+      break;
+    }
+  } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+}
+
+// Fill the given memory block with a dummy object. Used to fill in a
+// copy of objects that was lost in race.
+void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size) {
+  CHECK(IsAligned<kObjectAlignment>(byte_size));
+  memset(dummy_obj, 0, byte_size);
+  mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
+  CHECK(int_array_class != nullptr);
+  AssertToSpaceInvariant(nullptr, MemberOffset(0), int_array_class);
+  size_t component_size = int_array_class->GetComponentSize();
+  CHECK_EQ(component_size, sizeof(int32_t));
+  size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
+  if (data_offset > byte_size) {
+    // An int array is too big. Use java.lang.Object.
+    mirror::Class* java_lang_Object = WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object);
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object);
+    CHECK_EQ(byte_size, java_lang_Object->GetObjectSize());
+    dummy_obj->SetClass(java_lang_Object);
+    CHECK_EQ(byte_size, dummy_obj->SizeOf());
+  } else {
+    // Use an int array.
+    dummy_obj->SetClass(int_array_class);
+    CHECK(dummy_obj->IsArrayInstance());
+    int32_t length = (byte_size - data_offset) / component_size;
+    dummy_obj->AsArray()->SetLength(length);
+    CHECK_EQ(dummy_obj->AsArray()->GetLength(), length)
+        << "byte_size=" << byte_size << " length=" << length
+        << " component_size=" << component_size << " data_offset=" << data_offset;
+    CHECK_EQ(byte_size, dummy_obj->SizeOf())
+        << "byte_size=" << byte_size << " length=" << length
+        << " component_size=" << component_size << " data_offset=" << data_offset;
+  }
+}
+
+// Reuse the memory blocks that were copy of objects that were lost in race.
+mirror::Object* ConcurrentCopying::AllocateInSkippedBlock(size_t alloc_size) {
+  // Try to reuse the blocks that were unused due to CAS failures.
+  CHECK(IsAligned<space::RegionSpace::kAlignment>(alloc_size));
+  Thread* self = Thread::Current();
+  size_t min_object_size = RoundUp(sizeof(mirror::Object), space::RegionSpace::kAlignment);
+  MutexLock mu(self, skipped_blocks_lock_);
+  auto it = skipped_blocks_map_.lower_bound(alloc_size);
+  if (it == skipped_blocks_map_.end()) {
+    // Not found.
+    return nullptr;
+  }
+  {
+    size_t byte_size = it->first;
+    CHECK_GE(byte_size, alloc_size);
+    if (byte_size > alloc_size && byte_size - alloc_size < min_object_size) {
+      // If remainder would be too small for a dummy object, retry with a larger request size.
+      it = skipped_blocks_map_.lower_bound(alloc_size + min_object_size);
+      if (it == skipped_blocks_map_.end()) {
+        // Not found.
+        return nullptr;
+      }
+      CHECK(IsAligned<space::RegionSpace::kAlignment>(it->first - alloc_size));
+      CHECK_GE(it->first - alloc_size, min_object_size)
+          << "byte_size=" << byte_size << " it->first=" << it->first << " alloc_size=" << alloc_size;
+    }
+  }
+  // Found a block.
+  CHECK(it != skipped_blocks_map_.end());
+  size_t byte_size = it->first;
+  uint8_t* addr = it->second;
+  CHECK_GE(byte_size, alloc_size);
+  CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr)));
+  CHECK(IsAligned<space::RegionSpace::kAlignment>(byte_size));
+  if (kVerboseMode) {
+    LOG(INFO) << "Reusing skipped bytes : " << reinterpret_cast<void*>(addr) << ", " << byte_size;
+  }
+  skipped_blocks_map_.erase(it);
+  memset(addr, 0, byte_size);
+  if (byte_size > alloc_size) {
+    // Return the remainder to the map.
+    CHECK(IsAligned<space::RegionSpace::kAlignment>(byte_size - alloc_size));
+    CHECK_GE(byte_size - alloc_size, min_object_size);
+    FillWithDummyObject(reinterpret_cast<mirror::Object*>(addr + alloc_size),
+                        byte_size - alloc_size);
+    CHECK(region_space_->IsInToSpace(reinterpret_cast<mirror::Object*>(addr + alloc_size)));
+    skipped_blocks_map_.insert(std::make_pair(byte_size - alloc_size, addr + alloc_size));
+  }
+  return reinterpret_cast<mirror::Object*>(addr);
+}
+
+mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) {
+  DCHECK(region_space_->IsInFromSpace(from_ref));
+  // No read barrier to avoid nested RB that might violate the to-space
+  // invariant. Note that from_ref is a from space ref so the SizeOf()
+  // call will access the from-space meta objects, but it's ok and necessary.
+  size_t obj_size = from_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+  size_t region_space_alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
+  size_t region_space_bytes_allocated = 0U;
+  size_t non_moving_space_bytes_allocated = 0U;
+  size_t bytes_allocated = 0U;
+  mirror::Object* to_ref = region_space_->AllocNonvirtual<true>(
+      region_space_alloc_size, &region_space_bytes_allocated, nullptr);
+  bytes_allocated = region_space_bytes_allocated;
+  if (to_ref != nullptr) {
+    DCHECK_EQ(region_space_alloc_size, region_space_bytes_allocated);
+  }
+  bool fall_back_to_non_moving = false;
+  if (UNLIKELY(to_ref == nullptr)) {
+    // Failed to allocate in the region space. Try the skipped blocks.
+    to_ref = AllocateInSkippedBlock(region_space_alloc_size);
+    if (to_ref != nullptr) {
+      // Succeeded to allocate in a skipped block.
+      if (heap_->use_tlab_) {
+        // This is necessary for the tlab case as it's not accounted in the space.
+        region_space_->RecordAlloc(to_ref);
+      }
+      bytes_allocated = region_space_alloc_size;
+    } else {
+      // Fall back to the non-moving space.
+      fall_back_to_non_moving = true;
+      if (kVerboseMode) {
+        LOG(INFO) << "Out of memory in the to-space. Fall back to non-moving. skipped_bytes="
+                  << to_space_bytes_skipped_.LoadSequentiallyConsistent()
+                  << " skipped_objects=" << to_space_objects_skipped_.LoadSequentiallyConsistent();
+      }
+      fall_back_to_non_moving = true;
+      to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size,
+                                               &non_moving_space_bytes_allocated, nullptr);
+      CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed";
+      bytes_allocated = non_moving_space_bytes_allocated;
+      // Mark it in the mark bitmap.
+      accounting::ContinuousSpaceBitmap* mark_bitmap =
+          heap_mark_bitmap_->GetContinuousSpaceBitmap(to_ref);
+      CHECK(mark_bitmap != nullptr);
+      CHECK(!mark_bitmap->AtomicTestAndSet(to_ref));
+    }
+  }
+  DCHECK(to_ref != nullptr);
+
+  // Attempt to install the forward pointer. This is in a loop as the
+  // lock word atomic write can fail.
+  while (true) {
+    // Copy the object. TODO: copy only the lockword in the second iteration and on?
+    memcpy(to_ref, from_ref, obj_size);
+    // Set the gray ptr.
+    if (kUseBakerReadBarrier) {
+      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+
+    LockWord old_lock_word = to_ref->GetLockWord(false);
+
+    if (old_lock_word.GetState() == LockWord::kForwardingAddress) {
+      // Lost the race. Another thread (either GC or mutator) stored
+      // the forwarding pointer first. Make the lost copy (to_ref)
+      // look like a valid but dead (dummy) object and keep it for
+      // future reuse.
+      FillWithDummyObject(to_ref, bytes_allocated);
+      if (!fall_back_to_non_moving) {
+        DCHECK(region_space_->IsInToSpace(to_ref));
+        if (bytes_allocated > space::RegionSpace::kRegionSize) {
+          // Free the large alloc.
+          region_space_->FreeLarge(to_ref, bytes_allocated);
+        } else {
+          // Record the lost copy for later reuse.
+          heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated);
+          to_space_bytes_skipped_.FetchAndAddSequentiallyConsistent(bytes_allocated);
+          to_space_objects_skipped_.FetchAndAddSequentiallyConsistent(1);
+          MutexLock mu(Thread::Current(), skipped_blocks_lock_);
+          skipped_blocks_map_.insert(std::make_pair(bytes_allocated,
+                                                    reinterpret_cast<uint8_t*>(to_ref)));
+        }
+      } else {
+        DCHECK(heap_->non_moving_space_->HasAddress(to_ref));
+        DCHECK_EQ(bytes_allocated, non_moving_space_bytes_allocated);
+        // Free the non-moving-space chunk.
+        accounting::ContinuousSpaceBitmap* mark_bitmap =
+            heap_mark_bitmap_->GetContinuousSpaceBitmap(to_ref);
+        CHECK(mark_bitmap != nullptr);
+        CHECK(mark_bitmap->Clear(to_ref));
+        heap_->non_moving_space_->Free(Thread::Current(), to_ref);
+      }
+
+      // Get the winner's forward ptr.
+      mirror::Object* lost_fwd_ptr = to_ref;
+      to_ref = reinterpret_cast<mirror::Object*>(old_lock_word.ForwardingAddress());
+      CHECK(to_ref != nullptr);
+      CHECK_NE(to_ref, lost_fwd_ptr);
+      CHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref));
+      CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress);
+      return to_ref;
+    }
+
+    LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
+
+    // Try to atomically write the fwd ptr.
+    bool success = from_ref->CasLockWordWeakSequentiallyConsistent(old_lock_word, new_lock_word);
+    if (LIKELY(success)) {
+      // The CAS succeeded.
+      objects_moved_.FetchAndAddSequentiallyConsistent(1);
+      bytes_moved_.FetchAndAddSequentiallyConsistent(region_space_alloc_size);
+      if (LIKELY(!fall_back_to_non_moving)) {
+        DCHECK(region_space_->IsInToSpace(to_ref));
+      } else {
+        DCHECK(heap_->non_moving_space_->HasAddress(to_ref));
+        DCHECK_EQ(bytes_allocated, non_moving_space_bytes_allocated);
+      }
+      if (kUseBakerReadBarrier) {
+        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+      }
+      DCHECK(GetFwdPtr(from_ref) == to_ref);
+      CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress);
+      PushOntoMarkStack<true>(to_ref);
+      return to_ref;
+    } else {
+      // The CAS failed. It may have lost the race or may have failed
+      // due to monitor/hashcode ops. Either way, retry.
+    }
+  }
+}
+
+mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) {
+  DCHECK(from_ref != nullptr);
+  if (region_space_->IsInToSpace(from_ref)) {
+    // It's already marked.
+    return from_ref;
+  }
+  mirror::Object* to_ref;
+  if (region_space_->IsInFromSpace(from_ref)) {
+    to_ref = GetFwdPtr(from_ref);
+    DCHECK(to_ref == nullptr || region_space_->IsInToSpace(to_ref) ||
+           heap_->non_moving_space_->HasAddress(to_ref))
+        << "from_ref=" << from_ref << " to_ref=" << to_ref;
+  } else if (region_space_->IsInUnevacFromSpace(from_ref)) {
+    if (region_space_bitmap_->Test(from_ref)) {
+      to_ref = from_ref;
+    } else {
+      to_ref = nullptr;
+    }
+  } else {
+    // from_ref is in a non-moving space.
+    if (immune_region_.ContainsObject(from_ref)) {
+      accounting::ContinuousSpaceBitmap* cc_bitmap =
+          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
+      DCHECK(cc_bitmap != nullptr)
+          << "An immune space object must have a bitmap";
+      if (kIsDebugBuild) {
+        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
+            << "Immune space object must be already marked";
+      }
+      if (cc_bitmap->Test(from_ref)) {
+        // Already marked.
+        to_ref = from_ref;
+      } else {
+        // Newly marked.
+        to_ref = nullptr;
+      }
+    } else {
+      // Non-immune non-moving space. Use the mark bitmap.
+      accounting::ContinuousSpaceBitmap* mark_bitmap =
+          heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref);
+      accounting::LargeObjectBitmap* los_bitmap =
+          heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
+      CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+      bool is_los = mark_bitmap == nullptr;
+      if (!is_los && mark_bitmap->Test(from_ref)) {
+        // Already marked.
+        to_ref = from_ref;
+      } else if (is_los && los_bitmap->Test(from_ref)) {
+        // Already marked in LOS.
+        to_ref = from_ref;
+      } else {
+        // Not marked.
+        if (IsOnAllocStack(from_ref)) {
+          // If on the allocation stack, it's considered marked.
+          to_ref = from_ref;
+        } else {
+          // Not marked.
+          to_ref = nullptr;
+        }
+      }
+    }
+  }
+  return to_ref;
+}
+
+bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) {
+  QuasiAtomic::ThreadFenceAcquire();
+  accounting::ObjectStack* alloc_stack = GetAllocationStack();
+  mirror::Object** begin = alloc_stack->Begin();
+  // Important to read end once as it could be concurrently updated and screw up std::find().
+  mirror::Object** end = alloc_stack->End();
+  return std::find(begin, end, ref) != end;
+}
+
+mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
+  if (from_ref == nullptr) {
+    return nullptr;
+  }
+  DCHECK(from_ref != nullptr);
+  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (region_space_->IsInToSpace(from_ref)) {
+    // It's already marked.
+    return from_ref;
+  }
+  mirror::Object* to_ref;
+  if (region_space_->IsInFromSpace(from_ref)) {
+    to_ref = GetFwdPtr(from_ref);
+    if (kUseBakerReadBarrier) {
+      DCHECK(to_ref != ReadBarrier::GrayPtr()) << "from_ref=" << from_ref << " to_ref=" << to_ref;
+    }
+    if (to_ref == nullptr) {
+      // It isn't marked yet. Mark it by copying it to the to-space.
+      to_ref = Copy(from_ref);
+    }
+    DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
+        << "from_ref=" << from_ref << " to_ref=" << to_ref;
+  } else if (region_space_->IsInUnevacFromSpace(from_ref)) {
+    // This may or may not succeed, which is ok.
+    if (kUseBakerReadBarrier) {
+      from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+    }
+    if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
+      // Already marked.
+      to_ref = from_ref;
+    } else {
+      // Newly marked.
+      to_ref = from_ref;
+      if (kUseBakerReadBarrier) {
+        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+      }
+      PushOntoMarkStack<true>(to_ref);
+    }
+  } else {
+    // from_ref is in a non-moving space.
+    DCHECK(!region_space_->HasAddress(from_ref)) << from_ref;
+    if (immune_region_.ContainsObject(from_ref)) {
+      accounting::ContinuousSpaceBitmap* cc_bitmap =
+          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
+      DCHECK(cc_bitmap != nullptr)
+          << "An immune space object must have a bitmap";
+      if (kIsDebugBuild) {
+        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
+            << "Immune space object must be already marked";
+      }
+      // This may or may not succeed, which is ok.
+      if (kUseBakerReadBarrier) {
+        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+      }
+      if (cc_bitmap->AtomicTestAndSet(from_ref)) {
+        // Already marked.
+        to_ref = from_ref;
+      } else {
+        // Newly marked.
+        to_ref = from_ref;
+        if (kUseBakerReadBarrier) {
+          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+        }
+        PushOntoMarkStack<true>(to_ref);
+      }
+    } else {
+      // Use the mark bitmap.
+      accounting::ContinuousSpaceBitmap* mark_bitmap =
+          heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref);
+      accounting::LargeObjectBitmap* los_bitmap =
+          heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
+      CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+      bool is_los = mark_bitmap == nullptr;
+      if (!is_los && mark_bitmap->Test(from_ref)) {
+        // Already marked.
+        to_ref = from_ref;
+        if (kUseBakerReadBarrier) {
+          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        }
+      } else if (is_los && los_bitmap->Test(from_ref)) {
+        // Already marked in LOS.
+        to_ref = from_ref;
+        if (kUseBakerReadBarrier) {
+          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        }
+      } else {
+        // Not marked.
+        if (IsOnAllocStack(from_ref)) {
+          // If it's on the allocation stack, it's considered marked. Keep it white.
+          to_ref = from_ref;
+          // Objects on the allocation stack need not be marked.
+          if (!is_los) {
+            DCHECK(!mark_bitmap->Test(to_ref));
+          } else {
+            DCHECK(!los_bitmap->Test(to_ref));
+          }
+          if (kUseBakerReadBarrier) {
+            DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+          }
+        } else {
+          // Not marked or on the allocation stack. Try to mark it.
+          // This may or may not succeed, which is ok.
+          if (kUseBakerReadBarrier) {
+            from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+          }
+          if (!is_los && mark_bitmap->AtomicTestAndSet(from_ref)) {
+            // Already marked.
+            to_ref = from_ref;
+          } else if (is_los && los_bitmap->AtomicTestAndSet(from_ref)) {
+            // Already marked in LOS.
+            to_ref = from_ref;
+          } else {
+            // Newly marked.
+            to_ref = from_ref;
+            if (kUseBakerReadBarrier) {
+              DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+            }
+            PushOntoMarkStack<true>(to_ref);
+          }
+        }
+      }
+    }
+  }
+  return to_ref;
+}
+
+void ConcurrentCopying::FinishPhase() {
+  region_space_ = nullptr;
+  CHECK(mark_queue_.IsEmpty());
+  mark_queue_.Clear();
+  {
+    MutexLock mu(Thread::Current(), skipped_blocks_lock_);
+    skipped_blocks_map_.clear();
+  }
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  heap_->ClearMarkedObjects();
+}
+
+mirror::Object* ConcurrentCopying::IsMarkedCallback(mirror::Object* from_ref, void* arg) {
+  return reinterpret_cast<ConcurrentCopying*>(arg)->IsMarked(from_ref);
+}
+
+bool ConcurrentCopying::IsHeapReferenceMarkedCallback(
+    mirror::HeapReference<mirror::Object>* field, void* arg) {
+  mirror::Object* from_ref = field->AsMirrorPtr();
+  mirror::Object* to_ref = reinterpret_cast<ConcurrentCopying*>(arg)->IsMarked(from_ref);
+  if (to_ref == nullptr) {
+    return false;
+  }
+  if (from_ref != to_ref) {
+    QuasiAtomic::ThreadFenceRelease();
+    field->Assign(to_ref);
+    QuasiAtomic::ThreadFenceSequentiallyConsistent();
+  }
+  return true;
+}
+
+mirror::Object* ConcurrentCopying::MarkCallback(mirror::Object* from_ref, void* arg) {
+  return reinterpret_cast<ConcurrentCopying*>(arg)->Mark(from_ref);
+}
+
+void ConcurrentCopying::ProcessMarkStackCallback(void* arg) {
+  reinterpret_cast<ConcurrentCopying*>(arg)->ProcessMarkStack();
+}
+
+void ConcurrentCopying::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(
+      klass, reference, &IsHeapReferenceMarkedCallback, this);
+}
+
+void ConcurrentCopying::ProcessReferences(Thread* self, bool concurrent) {
+  TimingLogger::ScopedTiming split("ProcessReferences", GetTimings());
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  GetHeap()->GetReferenceProcessor()->ProcessReferences(
+      concurrent, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
+      &IsHeapReferenceMarkedCallback, &MarkCallback, &ProcessMarkStackCallback, this);
+}
+
+void ConcurrentCopying::RevokeAllThreadLocalBuffers() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  region_space_->RevokeAllThreadLocalBuffers();
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index ee5a785..43f520a 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -17,34 +17,268 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_H_
 #define ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_H_
 
+#include "barrier.h"
 #include "garbage_collector.h"
+#include "immune_region.h"
+#include "jni.h"
+#include "object_callbacks.h"
+#include "offsets.h"
+#include "gc/accounting/atomic_stack.h"
+#include "gc/accounting/read_barrier_table.h"
+#include "gc/accounting/space_bitmap.h"
+#include "mirror/object.h"
+#include "mirror/object_reference.h"
+#include "safe_map.h"
+
+#include <unordered_map>
+#include <vector>
 
 namespace art {
+class RootInfo;
+
 namespace gc {
+
+namespace accounting {
+  typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap;
+  class HeapBitmap;
+}  // namespace accounting
+
+namespace space {
+  class RegionSpace;
+}  // namespace space
+
 namespace collector {
 
+// Concurrent queue. Used as the mark stack. TODO: use a concurrent
+// stack for locality.
+class MarkQueue {
+ public:
+  explicit MarkQueue(size_t size) : size_(size) {
+    CHECK(IsPowerOfTwo(size_));
+    buf_.reset(new Atomic<mirror::Object*>[size_]);
+    CHECK(buf_.get() != nullptr);
+    Clear();
+  }
+
+  ALWAYS_INLINE Atomic<mirror::Object*>* GetSlotAddr(size_t index) {
+    return &(buf_.get()[index & (size_ - 1)]);
+  }
+
+  // Multiple-proceducer enqueue.
+  bool Enqueue(mirror::Object* to_ref) {
+    size_t t;
+    do {
+      t = tail_.LoadRelaxed();
+      size_t h = head_.LoadSequentiallyConsistent();
+      if (t + size_ == h) {
+        // It's full.
+        return false;
+      }
+    } while (!tail_.CompareExchangeWeakSequentiallyConsistent(t, t + 1));
+    // We got a slot but its content has not been filled yet at this point.
+    GetSlotAddr(t)->StoreSequentiallyConsistent(to_ref);
+    return true;
+  }
+
+  // Thread-unsafe.
+  bool EnqueueThreadUnsafe(mirror::Object* to_ref) {
+    size_t t = tail_.LoadRelaxed();
+    size_t h = head_.LoadRelaxed();
+    if (t + size_ == h) {
+      // It's full.
+      return false;
+    }
+    GetSlotAddr(t)->StoreRelaxed(to_ref);
+    tail_.StoreRelaxed(t + 1);
+    return true;
+  }
+
+  // Single-consumer dequeue.
+  mirror::Object* Dequeue() {
+    size_t h = head_.LoadRelaxed();
+    size_t t = tail_.LoadSequentiallyConsistent();
+    if (h == t) {
+      // it's empty.
+      return nullptr;
+    }
+    Atomic<mirror::Object*>* slot = GetSlotAddr(h);
+    mirror::Object* ref = slot->LoadSequentiallyConsistent();
+    while (ref == nullptr) {
+      // Wait until the slot content becomes visible.
+      ref = slot->LoadSequentiallyConsistent();
+    }
+    slot->StoreRelaxed(nullptr);
+    head_.StoreSequentiallyConsistent(h + 1);
+    return ref;
+  }
+
+  bool IsEmpty() {
+    size_t h = head_.LoadSequentiallyConsistent();
+    size_t t = tail_.LoadSequentiallyConsistent();
+    return h == t;
+  }
+
+  void Clear() {
+    head_.StoreRelaxed(0);
+    tail_.StoreRelaxed(0);
+    memset(buf_.get(), 0, size_ * sizeof(Atomic<mirror::Object*>));
+  }
+
+ private:
+  Atomic<size_t> head_;
+  Atomic<size_t> tail_;
+
+  size_t size_;
+  std::unique_ptr<Atomic<mirror::Object*>[]> buf_;
+};
+
 class ConcurrentCopying : public GarbageCollector {
  public:
-  explicit ConcurrentCopying(Heap* heap, bool generational = false,
-                             const std::string& name_prefix = "")
-      : GarbageCollector(heap,
-                         name_prefix + (name_prefix.empty() ? "" : " ") +
-                         "concurrent copying + mark sweep") {
-    UNUSED(generational);
-  }
+  // TODO: disable thse flags for production use.
+  // Enable the no-from-space-refs verification at the pause.
+  static constexpr bool kEnableNoFromSpaceRefsVerification = true;
+  // Enable the from-space bytes/objects check.
+  static constexpr bool kEnableFromSpaceAccountingCheck = true;
+  // Enable verbose mode.
+  static constexpr bool kVerboseMode = true;
 
-  ~ConcurrentCopying() {}
+  ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
+  ~ConcurrentCopying();
 
-  virtual void RunPhases() OVERRIDE {}
+  virtual void RunPhases() OVERRIDE;
+  void InitializePhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void MarkingPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ReclaimPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FinishPhase();
+
+  void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
   virtual GcType GetGcType() const OVERRIDE {
     return kGcTypePartial;
   }
   virtual CollectorType GetCollectorType() const OVERRIDE {
     return kCollectorTypeCC;
   }
-  virtual void RevokeAllThreadLocalBuffers() OVERRIDE {}
+  virtual void RevokeAllThreadLocalBuffers() OVERRIDE;
+  void SetRegionSpace(space::RegionSpace* region_space) {
+    DCHECK(region_space != nullptr);
+    region_space_ = region_space;
+  }
+  space::RegionSpace* RegionSpace() {
+    return region_space_;
+  }
+  void AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset, mirror::Object* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsInToSpace(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(ref != nullptr);
+    return IsMarked(ref) == ref;
+  }
+  mirror::Object* Mark(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsMarking() const {
+    return is_marking_;
+  }
+  bool IsActive() const {
+    return is_active_;
+  }
+  Barrier& GetBarrier() {
+    return *gc_barrier_;
+  }
 
  private:
+  mirror::Object* PopOffMarkStack();
+  template<bool kThreadSafe>
+  void PushOntoMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* Copy(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Scan(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Process(mirror::Object* obj, MemberOffset offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Process(mirror::Object** root) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void ProcessRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  accounting::ObjectStack* GetAllocationStack();
+  accounting::ObjectStack* GetLiveStack();
+  bool ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ProcessReferences(Thread* self, bool concurrent)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* IsMarked(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static mirror::Object* MarkCallback(mirror::Object* from_ref, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static mirror::Object* IsMarkedCallback(mirror::Object* from_ref, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static bool IsHeapReferenceMarkedCallback(
+      mirror::HeapReference<mirror::Object>* field, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void ProcessMarkStackCallback(void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SweepSystemWeaks(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void Sweep(bool swap_bitmaps)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void SweepLargeObjects(bool swap_bitmaps)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void ClearBlackPtrs()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* AllocateInSkippedBlock(size_t alloc_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void CheckEmptyMarkQueue() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void IssueEmptyCheckpoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsOnAllocStack(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* GetFwdPtr(mirror::Object* from_ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FlipThreadRoots() LOCKS_EXCLUDED(Locks::mutator_lock_);;
+  void SwapStacks(Thread* self);
+  void RecordLiveStackFreezeSize(Thread* self);
+  void ComputeUnevacFromSpaceLiveRatio();
+
+  space::RegionSpace* region_space_;      // The underlying region space.
+  std::unique_ptr<Barrier> gc_barrier_;
+  MarkQueue mark_queue_;
+  bool is_marking_;                       // True while marking is ongoing.
+  bool is_active_;                        // True while the collection is ongoing.
+  bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
+  ImmuneRegion immune_region_;
+  std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_;
+  std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_;
+  accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
+  // A cache of Heap::GetMarkBitmap().
+  accounting::HeapBitmap* heap_mark_bitmap_;
+  size_t live_stack_freeze_size_;
+  size_t from_space_num_objects_at_first_pause_;
+  size_t from_space_num_bytes_at_first_pause_;
+  Atomic<int> is_mark_queue_push_disallowed_;
+
+  // How many objects and bytes we moved. Used for accounting.
+  Atomic<size_t> bytes_moved_;
+  Atomic<size_t> objects_moved_;
+
+  // The skipped blocks are memory blocks/chucks that were copies of
+  // objects that were unused due to lost races (cas failures) at
+  // object copy/forward pointer install. They are reused.
+  Mutex skipped_blocks_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::multimap<size_t, uint8_t*> skipped_blocks_map_ GUARDED_BY(skipped_blocks_lock_);
+  Atomic<size_t> to_space_bytes_skipped_;
+  Atomic<size_t> to_space_objects_skipped_;
+
+  accounting::ReadBarrierTable* rb_table_;
+  bool force_evacuate_all_;  // True if all regions are evacuated.
+
+  friend class ConcurrentCopyingRefFieldsVisitor;
+  friend class ConcurrentCopyingImmuneSpaceObjVisitor;
+  friend class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor;
+  friend class ConcurrentCopyingVerifyNoFromSpaceRefsObjectVisitor;
+  friend class ConcurrentCopyingClearBlackPtrsVisitor;
+  friend class ConcurrentCopyingLostCopyVisitor;
+  friend class ThreadFlipVisitor;
+  friend class FlipCallback;
+  friend class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor;
+
   DISALLOW_COPY_AND_ASSIGN(ConcurrentCopying);
 };
 
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 277525e..30144f0 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -57,6 +57,13 @@
     UpdateSize();
   }
 
+  mirror::Object* Begin() {
+    return begin_;
+  }
+  mirror::Object* End() {
+    return end_;
+  }
+
  private:
   bool IsEmpty() const {
     return size_ == 0;
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 9d2f6d1..b8c2452 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -25,6 +25,7 @@
 #include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/region_space-inl.h"
 #include "gc/space/rosalloc_space-inl.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
@@ -66,11 +67,12 @@
   size_t bytes_allocated;
   size_t usable_size;
   size_t new_num_bytes_allocated = 0;
-  if (allocator == kAllocatorTypeTLAB) {
+  if (allocator == kAllocatorTypeTLAB || allocator == kAllocatorTypeRegionTLAB) {
     byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
   }
   // If we have a thread local allocation we don't need to update bytes allocated.
-  if (allocator == kAllocatorTypeTLAB && byte_count <= self->TlabSize()) {
+  if ((allocator == kAllocatorTypeTLAB || allocator == kAllocatorTypeRegionTLAB) &&
+      byte_count <= self->TlabSize()) {
     obj = self->AllocTlab(byte_count);
     DCHECK(obj != nullptr) << "AllocTlab can't fail";
     obj->SetClass(klass);
@@ -195,7 +197,7 @@
 inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
                                            size_t alloc_size, size_t* bytes_allocated,
                                            size_t* usable_size) {
-  if (allocator_type != kAllocatorTypeTLAB &&
+  if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB &&
       UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
@@ -265,6 +267,55 @@
       *usable_size = alloc_size;
       break;
     }
+    case kAllocatorTypeRegion: {
+      DCHECK(region_space_ != nullptr);
+      alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
+      ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
+      break;
+    }
+    case kAllocatorTypeRegionTLAB: {
+      DCHECK(region_space_ != nullptr);
+      DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
+      if (UNLIKELY(self->TlabSize() < alloc_size)) {
+        if (space::RegionSpace::kRegionSize >= alloc_size) {
+          // Non-large. Check OOME for a tlab.
+          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) {
+            // Try to allocate a tlab.
+            if (!region_space_->AllocNewTlab(self)) {
+              // Failed to allocate a tlab. Try non-tlab.
+              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
+              return ret;
+            }
+            *bytes_allocated = space::RegionSpace::kRegionSize;
+            // Fall-through.
+          } else {
+            // Check OOME for a non-tlab allocation.
+            if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
+              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
+              return ret;
+            } else {
+              // Neither tlab or non-tlab works. Give up.
+              return nullptr;
+            }
+          }
+        } else {
+          // Large. Check OOME.
+          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+            ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size);
+            return ret;
+          } else {
+            return nullptr;
+          }
+        }
+      } else {
+        *bytes_allocated = 0;
+      }
+      // The allocation can't fail.
+      ret = self->AllocTlab(alloc_size);
+      DCHECK(ret != nullptr);
+      *usable_size = alloc_size;
+      break;
+    }
     default: {
       LOG(FATAL) << "Invalid allocator type";
       ret = nullptr;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6ba30c6..ba06e05 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -49,6 +49,7 @@
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/region_space.h"
 #include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
 #include "gc/space/zygote_space.h"
@@ -176,6 +177,7 @@
       current_non_moving_allocator_(kAllocatorTypeNonMoving),
       bump_pointer_space_(nullptr),
       temp_space_(nullptr),
+      region_space_(nullptr),
       min_free_(min_free),
       max_free_(max_free),
       target_utilization_(target_utilization),
@@ -211,6 +213,12 @@
   mark_bitmap_.reset(new accounting::HeapBitmap(this));
   // Requested begin for the alloc space, to follow the mapped image and oat files
   uint8_t* requested_alloc_space_begin = nullptr;
+  if (foreground_collector_type_ == kCollectorTypeCC) {
+    // Need to use a low address so that we can allocate a contiguous
+    // 2 * Xmx space when there's no image (dex2oat for target).
+    CHECK_GE(300 * MB, non_moving_space_capacity);
+    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity;
+  }
   if (!image_file_name.empty()) {
     std::string error_msg;
     space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str(),
@@ -241,8 +249,9 @@
                                      +-main alloc space2 / bump space 2 (capacity_)+-
                                      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
   */
-  // We don't have hspace compaction enabled with GSS.
-  if (foreground_collector_type_ == kCollectorTypeGSS) {
+  // We don't have hspace compaction enabled with GSS or CC.
+  if (foreground_collector_type_ == kCollectorTypeGSS ||
+      foreground_collector_type_ == kCollectorTypeCC) {
     use_homogeneous_space_compaction_for_oom_ = false;
   }
   bool support_homogeneous_space_compaction =
@@ -280,10 +289,12 @@
     // Try to reserve virtual memory at a lower address if we have a separate non moving space.
     request_begin = reinterpret_cast<uint8_t*>(300 * MB);
   }
-  // Attempt to create 2 mem maps at or after the requested begin.
-  main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0], request_begin, capacity_,
-                                                    &error_str));
-  CHECK(main_mem_map_1.get() != nullptr) << error_str;
+  if (foreground_collector_type_ != kCollectorTypeCC) {
+    // Attempt to create 2 mem maps at or after the requested begin.
+    main_mem_map_1.reset(MapAnonymousPreferredAddress(kMemMapSpaceName[0], request_begin, capacity_,
+                                                      &error_str));
+    CHECK(main_mem_map_1.get() != nullptr) << error_str;
+  }
   if (support_homogeneous_space_compaction ||
       background_collector_type_ == kCollectorTypeSS ||
       foreground_collector_type_ == kCollectorTypeSS) {
@@ -305,7 +316,10 @@
     AddSpace(non_moving_space_);
   }
   // Create other spaces based on whether or not we have a moving GC.
-  if (IsMovingGc(foreground_collector_type_) && foreground_collector_type_ != kCollectorTypeGSS) {
+  if (foreground_collector_type_ == kCollectorTypeCC) {
+    region_space_ = space::RegionSpace::Create("Region space", capacity_ * 2, request_begin);
+    AddSpace(region_space_);
+  } else if (IsMovingGc(foreground_collector_type_) && foreground_collector_type_ != kCollectorTypeGSS) {
     // Create bump pointer spaces.
     // We only to create the bump pointer if the foreground collector is a compacting GC.
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
@@ -379,6 +393,12 @@
   // Allocate the card table.
   card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
   CHECK(card_table_.get() != NULL) << "Failed to create card table";
+
+  if (foreground_collector_type_ == kCollectorTypeCC && kUseTableLookupReadBarrier) {
+    rb_table_.reset(new accounting::ReadBarrierTable());
+    DCHECK(rb_table_->IsAllCleared());
+  }
+
   // Card cache for now since it makes it easier for us to update the references to the copying
   // spaces.
   accounting::ModUnionTable* mod_union_table =
@@ -703,29 +723,64 @@
   }
 }
 
+// Visit objects when threads aren't suspended. If concurrent moving
+// GC, disable moving GC and suspend threads and then visit objects.
 void Heap::VisitObjects(ObjectCallback callback, void* arg) {
   Thread* self = Thread::Current();
-  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-    // Threads are already suspended.
-    VisitObjectsInternal(callback, arg);
-  } else if (IsGcConcurrent() && IsMovingGc(collector_type_)) {
-    // Concurrent moving GC. Suspend all threads and visit objects.
-    DCHECK_EQ(collector_type_, foreground_collector_type_);
-    DCHECK_EQ(foreground_collector_type_, background_collector_type_)
-        << "Assume no transition such that collector_type_ won't change";
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  DCHECK(!Locks::mutator_lock_->IsExclusiveHeld(self)) << "Call VisitObjectsPaused() instead";
+  if (IsGcConcurrentAndMoving()) {
+    // Concurrent moving GC. Just suspending threads isn't sufficient
+    // because a collection isn't one big pause and we could suspend
+    // threads in the middle (between phases) of a concurrent moving
+    // collection where it's not easily known which objects are alive
+    // (both the region space and the non-moving space) or which
+    // copies of objects to visit, and the to-space invariant could be
+    // easily broken. Visit objects while GC isn't running by using
+    // IncrementDisableMovingGC() and threads are suspended.
+    IncrementDisableMovingGC(self);
     self->TransitionFromRunnableToSuspended(kWaitingForVisitObjects);
     ThreadList* tl = Runtime::Current()->GetThreadList();
     tl->SuspendAll();
+    VisitObjectsInternalRegionSpace(callback, arg);
     VisitObjectsInternal(callback, arg);
     tl->ResumeAll();
     self->TransitionFromSuspendedToRunnable();
+    DecrementDisableMovingGC(self);
   } else {
     // GCs can move objects, so don't allow this.
     ScopedAssertNoThreadSuspension ants(self, "Visiting objects");
+    DCHECK(region_space_ == nullptr);
     VisitObjectsInternal(callback, arg);
   }
 }
 
+// Visit objects when threads are already suspended.
+void Heap::VisitObjectsPaused(ObjectCallback callback, void* arg) {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  VisitObjectsInternalRegionSpace(callback, arg);
+  VisitObjectsInternal(callback, arg);
+}
+
+// Visit objects in the region spaces.
+void Heap::VisitObjectsInternalRegionSpace(ObjectCallback callback, void* arg) {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  if (region_space_ != nullptr) {
+    DCHECK(IsGcConcurrentAndMoving());
+    if (!zygote_creation_lock_.IsExclusiveHeld(self)) {
+      // Exclude the pre-zygote fork time where the semi-space collector
+      // calls VerifyHeapReferences() as part of the zygote compaction
+      // which then would call here without the moving GC disabled,
+      // which is fine.
+      DCHECK(IsMovingGCDisabled(self));
+    }
+    region_space_->Walk(callback, arg);
+  }
+}
+
+// Visit objects in the other spaces.
 void Heap::VisitObjectsInternal(ObjectCallback callback, void* arg) {
   if (bump_pointer_space_ != nullptr) {
     // Visit objects in bump pointer space.
@@ -956,6 +1011,9 @@
     } else if (allocator_type == kAllocatorTypeBumpPointer ||
                allocator_type == kAllocatorTypeTLAB) {
       space = bump_pointer_space_;
+    } else if (allocator_type == kAllocatorTypeRegion ||
+               allocator_type == kAllocatorTypeRegionTLAB) {
+      space = region_space_;
     }
     if (space != nullptr) {
       space->LogFragmentationAllocFailure(oss, byte_count);
@@ -1062,6 +1120,9 @@
   if (bump_pointer_space_ != nullptr) {
     total_alloc_space_allocated -= bump_pointer_space_->Size();
   }
+  if (region_space_ != nullptr) {
+    total_alloc_space_allocated -= region_space_->GetBytesAllocated();
+  }
   const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
       static_cast<float>(total_alloc_space_size);
   uint64_t gc_heap_end_ns = NanoTime();
@@ -1134,6 +1195,9 @@
     // a GC). When a GC isn't running End() - Begin() is 0 which means no objects are contained.
     return temp_space_->Contains(obj);
   }
+  if (region_space_ != nullptr && region_space_->HasAddress(obj)) {
+    return true;
+  }
   space::ContinuousSpace* c_space = FindContinuousSpaceFromObject(obj, true);
   space::DiscontinuousSpace* d_space = nullptr;
   if (c_space != nullptr) {
@@ -1780,7 +1844,15 @@
     collector_type_ = collector_type;
     gc_plan_.clear();
     switch (collector_type_) {
-      case kCollectorTypeCC:  // Fall-through.
+      case kCollectorTypeCC: {
+        gc_plan_.push_back(collector::kGcTypeFull);
+        if (use_tlab_) {
+          ChangeAllocator(kAllocatorTypeRegionTLAB);
+        } else {
+          ChangeAllocator(kAllocatorTypeRegion);
+        }
+        break;
+      }
       case kCollectorTypeMC:  // Fall-through.
       case kCollectorTypeSS:  // Fall-through.
       case kCollectorTypeGSS: {
@@ -1963,7 +2035,11 @@
     // Compact the bump pointer space to a new zygote bump pointer space.
     bool reset_main_space = false;
     if (IsMovingGc(collector_type_)) {
-      zygote_collector.SetFromSpace(bump_pointer_space_);
+      if (collector_type_ == kCollectorTypeCC) {
+        zygote_collector.SetFromSpace(region_space_);
+      } else {
+        zygote_collector.SetFromSpace(bump_pointer_space_);
+      }
     } else {
       CHECK(main_space_ != nullptr);
       // Copy from the main space.
@@ -1984,7 +2060,11 @@
       delete old_main_space;
       AddSpace(main_space_);
     } else {
-      bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      if (collector_type_ == kCollectorTypeCC) {
+        region_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      } else {
+        bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      }
     }
     if (temp_space_ != nullptr) {
       CHECK(temp_space_->IsEmpty());
@@ -2154,7 +2234,9 @@
   // TODO: Clean this up.
   if (compacting_gc) {
     DCHECK(current_allocator_ == kAllocatorTypeBumpPointer ||
-           current_allocator_ == kAllocatorTypeTLAB);
+           current_allocator_ == kAllocatorTypeTLAB ||
+           current_allocator_ == kAllocatorTypeRegion ||
+           current_allocator_ == kAllocatorTypeRegionTLAB);
     switch (collector_type_) {
       case kCollectorTypeSS:
         // Fall-through.
@@ -2165,6 +2247,7 @@
         collector = semi_space_collector_;
         break;
       case kCollectorTypeCC:
+        concurrent_copying_collector_->SetRegionSpace(region_space_);
         collector = concurrent_copying_collector_;
         break;
       case kCollectorTypeMC:
@@ -2174,7 +2257,7 @@
       default:
         LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_);
     }
-    if (collector != mark_compact_collector_) {
+    if (collector != mark_compact_collector_ && collector != concurrent_copying_collector_) {
       temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
       CHECK(temp_space_->IsEmpty());
     }
@@ -2491,7 +2574,7 @@
   // 2. Allocated during the GC (pre sweep GC verification).
   // We don't want to verify the objects in the live stack since they themselves may be
   // pointing to dead objects if they are not reachable.
-  VisitObjects(VerifyObjectVisitor::VisitCallback, &visitor);
+  VisitObjectsPaused(VerifyObjectVisitor::VisitCallback, &visitor);
   // Verify the roots:
   Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRootCallback, &visitor);
   if (visitor.GetFailureCount() > 0) {
@@ -2633,7 +2716,7 @@
 
 void Heap::RevokeAllThreadLocalAllocationStacks(Thread* self) {
   // This must be called only during the pause.
-  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self));
+  DCHECK(Locks::mutator_lock_->IsExclusiveHeld(self));
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
   std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
@@ -3175,6 +3258,9 @@
   if (bump_pointer_space_ != nullptr) {
     bump_pointer_space_->RevokeThreadLocalBuffers(thread);
   }
+  if (region_space_ != nullptr) {
+    region_space_->RevokeThreadLocalBuffers(thread);
+  }
 }
 
 void Heap::RevokeRosAllocThreadLocalBuffers(Thread* thread) {
@@ -3190,6 +3276,9 @@
   if (bump_pointer_space_ != nullptr) {
     bump_pointer_space_->RevokeAllThreadLocalBuffers();
   }
+  if (region_space_ != nullptr) {
+    region_space_->RevokeAllThreadLocalBuffers();
+  }
 }
 
 bool Heap::IsGCRequestPending() const {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 36a3767..b0b53b0 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -27,6 +27,7 @@
 #include "base/timing_logger.h"
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table.h"
+#include "gc/accounting/read_barrier_table.h"
 #include "gc/gc_cause.h"
 #include "gc/collector/garbage_collector.h"
 #include "gc/collector/gc_type.h"
@@ -86,6 +87,7 @@
   class ImageSpace;
   class LargeObjectSpace;
   class MallocSpace;
+  class RegionSpace;
   class RosAllocSpace;
   class Space;
   class SpaceTest;
@@ -218,8 +220,8 @@
   void VisitObjects(ObjectCallback callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  void VisitObjectsInternal(ObjectCallback callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  void VisitObjectsPaused(ObjectCallback callback, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
@@ -410,6 +412,10 @@
     return card_table_.get();
   }
 
+  accounting::ReadBarrierTable* GetReadBarrierTable() const {
+    return rb_table_.get();
+  }
+
   void AddFinalizerReference(Thread* self, mirror::Object** object);
 
   // Returns the number of bytes currently allocated.
@@ -623,6 +629,30 @@
     return zygote_space_ != nullptr;
   }
 
+  collector::ConcurrentCopying* ConcurrentCopyingCollector() {
+    return concurrent_copying_collector_;
+  }
+
+  CollectorType CurrentCollectorType() {
+    return collector_type_;
+  }
+
+  bool IsGcConcurrentAndMoving() const {
+    if (IsGcConcurrent() && IsMovingGc(collector_type_)) {
+      // Assume no transition when a concurrent moving collector is used.
+      DCHECK_EQ(collector_type_, foreground_collector_type_);
+      DCHECK_EQ(foreground_collector_type_, background_collector_type_)
+          << "Assume no transition such that collector_type_ won't change";
+      return true;
+    }
+    return false;
+  }
+
+  bool IsMovingGCDisabled(Thread* self) {
+    MutexLock mu(self, *gc_complete_lock_);
+    return disable_moving_gc_count_ > 0;
+  }
+
   // Request an asynchronous trim.
   void RequestTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
 
@@ -654,10 +684,14 @@
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
     return
         allocator_type != kAllocatorTypeBumpPointer &&
-        allocator_type != kAllocatorTypeTLAB;
+        allocator_type != kAllocatorTypeTLAB &&
+        allocator_type != kAllocatorTypeRegion &&
+        allocator_type != kAllocatorTypeRegionTLAB;
   }
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
-    return AllocatorHasAllocationStack(allocator_type);
+    return
+        allocator_type != kAllocatorTypeBumpPointer &&
+        allocator_type != kAllocatorTypeTLAB;
   }
   static bool IsMovingGc(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
@@ -813,6 +847,13 @@
   // Trim 0 pages at the end of reference tables.
   void TrimIndirectReferenceTables(Thread* self);
 
+  void VisitObjectsInternal(ObjectCallback callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void VisitObjectsInternalRegionSpace(ObjectCallback callback, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
 
@@ -842,6 +883,8 @@
   // The card table, dirtied by the write barrier.
   std::unique_ptr<accounting::CardTable> card_table_;
 
+  std::unique_ptr<accounting::ReadBarrierTable> rb_table_;
+
   // A mod-union table remembers all of the references from the it's space to other spaces.
   AllocationTrackingSafeMap<space::Space*, accounting::ModUnionTable*, kAllocatorTagHeap>
       mod_union_tables_;
@@ -1020,6 +1063,8 @@
   // Temp space is the space which the semispace collector copies to.
   space::BumpPointerSpace* temp_space_;
 
+  space::RegionSpace* region_space_;
+
   // Minimum free guarantees that you always have at least min_free_ free bytes after growing for
   // utilization, regardless of target utilization ratio.
   size_t min_free_;
@@ -1088,6 +1133,7 @@
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
+  friend class collector::ConcurrentCopying;
   friend class collector::MarkSweep;
   friend class collector::SemiSpace;
   friend class ReferenceQueue;
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 5eb095b..c67fd98 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -53,7 +53,7 @@
   // The slow path bool is contained in the reference class object, can only be set once
   // Only allow setting this with mutators suspended so that we can avoid using a lock in the
   // GetReferent fast path as an optimization.
-  void EnableSlowPath() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnableSlowPath() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Decode the referent, may block if references are being processed.
   mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::reference_processor_lock_);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index f4efe3c..7be0704 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -17,6 +17,7 @@
 #include "reference_queue.h"
 
 #include "accounting/card_table-inl.h"
+#include "collector/concurrent_copying.h"
 #include "heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -85,6 +86,24 @@
   } else {
     ref->SetPendingNext<false>(nullptr);
   }
+  Heap* heap = Runtime::Current()->GetHeap();
+  if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
+      heap->ConcurrentCopyingCollector()->IsActive()) {
+    // Clear the gray ptr we left in ConcurrentCopying::ProcessMarkStack().
+    // We don't want to do this when the zygote compaction collector (SemiSpace) is running.
+    CHECK(ref != nullptr);
+    CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr())
+        << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
+    if (heap->ConcurrentCopyingCollector()->RegionSpace()->IsInToSpace(ref)) {
+      // Moving objects.
+      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+    } else {
+      // Non-moving objects.
+      ref->SetReadBarrierPointer(ReadBarrier::BlackPtr());
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
+    }
+  }
   return ref;
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 04b09e9..9675ba6 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -57,7 +57,7 @@
                                  kGcRetentionPolicyAlwaysCollect),
       growth_end_(mem_map->End()),
       objects_allocated_(0), bytes_allocated_(0),
-      block_lock_("Block lock"),
+      block_lock_("Block lock", kBumpPointerSpaceBlockLock),
       main_block_size_(0),
       num_blocks_(0) {
 }
@@ -172,7 +172,8 @@
   // Walk all of the objects in the main block first.
   while (pos < main_end) {
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
-    if (obj->GetClass() == nullptr) {
+    // No read barrier because obj may not be a valid object.
+    if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() == nullptr) {
       // There is a race condition where a thread has just allocated an object but not set the
       // class. We can't know the size of this object, so we don't visit it and exit the function
       // since there is guaranteed to be not other blocks.
@@ -192,7 +193,8 @@
     CHECK_LE(reinterpret_cast<const uint8_t*>(end_obj), End());
     // We don't know how many objects are allocated in the current block. When we hit a null class
     // assume its the end. TODO: Have a thread update the header when it flushes the block?
-    while (obj < end_obj && obj->GetClass() != nullptr) {
+    // No read barrier because obj may not be a valid object.
+    while (obj < end_obj && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
       callback(obj, arg);
       obj = GetNextObject(obj);
     }
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
new file mode 100644
index 0000000..fd00739
--- /dev/null
+++ b/runtime/gc/space/region_space-inl.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_
+
+#include "region_space.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* RegionSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated,
+                                          size_t* usable_size) {
+  num_bytes = RoundUp(num_bytes, kAlignment);
+  return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size);
+}
+
+inline mirror::Object* RegionSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated,
+                                                      size_t* usable_size) {
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  return Alloc(self, num_bytes, bytes_allocated, usable_size);
+}
+
+template<bool kForEvac>
+inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated,
+                                                    size_t* usable_size) {
+  DCHECK(IsAligned<kAlignment>(num_bytes));
+  mirror::Object* obj;
+  if (LIKELY(num_bytes <= kRegionSize)) {
+    // Non-large object.
+    if (!kForEvac) {
+      obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size);
+    } else {
+      DCHECK(evac_region_ != nullptr);
+      obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size);
+    }
+    if (LIKELY(obj != nullptr)) {
+      return obj;
+    }
+    MutexLock mu(Thread::Current(), region_lock_);
+    // Retry with current region since another thread may have updated it.
+    if (!kForEvac) {
+      obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size);
+    } else {
+      obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size);
+    }
+    if (LIKELY(obj != nullptr)) {
+      return obj;
+    }
+    if (!kForEvac) {
+      // Retain sufficient free regions for full evacuation.
+      if ((num_non_free_regions_ + 1) * 2 > num_regions_) {
+        return nullptr;
+      }
+      for (size_t i = 0; i < num_regions_; ++i) {
+        Region* r = &regions_[i];
+        if (r->IsFree()) {
+          r->Unfree(time_);
+          r->SetNewlyAllocated();
+          ++num_non_free_regions_;
+          obj = r->Alloc(num_bytes, bytes_allocated, usable_size);
+          CHECK(obj != nullptr);
+          current_region_ = r;
+          return obj;
+        }
+      }
+    } else {
+      for (size_t i = 0; i < num_regions_; ++i) {
+        Region* r = &regions_[i];
+        if (r->IsFree()) {
+          r->Unfree(time_);
+          ++num_non_free_regions_;
+          obj = r->Alloc(num_bytes, bytes_allocated, usable_size);
+          CHECK(obj != nullptr);
+          evac_region_ = r;
+          return obj;
+        }
+      }
+    }
+  } else {
+    // Large object.
+    obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size);
+    if (LIKELY(obj != nullptr)) {
+      return obj;
+    }
+  }
+  return nullptr;
+}
+
+inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* bytes_allocated,
+                                                  size_t* usable_size) {
+  DCHECK_EQ(state_, static_cast<uint8_t>(kRegionToSpace));
+  DCHECK(IsAligned<kAlignment>(num_bytes));
+  Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_);
+  uint8_t* old_top;
+  uint8_t* new_top;
+  do {
+    old_top = atomic_top->LoadRelaxed();
+    new_top = old_top + num_bytes;
+    if (UNLIKELY(new_top > end_)) {
+      return nullptr;
+    }
+  } while (!atomic_top->CompareExchangeWeakSequentiallyConsistent(old_top, new_top));
+  reinterpret_cast<Atomic<uint64_t>*>(&objects_allocated_)->FetchAndAddSequentiallyConsistent(1);
+  DCHECK_LE(atomic_top->LoadRelaxed(), end_);
+  DCHECK_LT(old_top, end_);
+  DCHECK_LE(new_top, end_);
+  *bytes_allocated = num_bytes;
+  if (usable_size != nullptr) {
+    *usable_size = num_bytes;
+  }
+  return reinterpret_cast<mirror::Object*>(old_top);
+}
+
+inline size_t RegionSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t num_bytes = obj->SizeOf();
+  if (usable_size != nullptr) {
+    if (LIKELY(num_bytes <= kRegionSize)) {
+      DCHECK(RefToRegion(obj)->IsNormal());
+      *usable_size = RoundUp(num_bytes, kAlignment);
+    } else {
+      DCHECK(RefToRegion(obj)->IsLarge());
+      *usable_size = RoundUp(num_bytes, kRegionSize);
+    }
+  }
+  return num_bytes;
+}
+
+template<RegionSpace::SubSpaceType kSubSpaceType>
+uint64_t RegionSpace::GetBytesAllocatedInternal() {
+  uint64_t bytes = 0;
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsFree()) {
+      continue;
+    }
+    switch (kSubSpaceType) {
+      case kAllSpaces:
+        bytes += r->BytesAllocated();
+        break;
+      case kFromSpace:
+        if (r->IsInFromSpace()) {
+          bytes += r->BytesAllocated();
+        }
+        break;
+      case kUnevacFromSpace:
+        if (r->IsInUnevacFromSpace()) {
+          bytes += r->BytesAllocated();
+        }
+        break;
+      case kToSpace:
+        if (r->IsInToSpace()) {
+          bytes += r->BytesAllocated();
+        }
+        break;
+      default:
+        LOG(FATAL) << "Unexpected space type : " << static_cast<int>(kSubSpaceType);
+    }
+  }
+  return bytes;
+}
+
+template<RegionSpace::SubSpaceType kSubSpaceType>
+uint64_t RegionSpace::GetObjectsAllocatedInternal() {
+  uint64_t bytes = 0;
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsFree()) {
+      continue;
+    }
+    switch (kSubSpaceType) {
+      case kAllSpaces:
+        bytes += r->ObjectsAllocated();
+        break;
+      case kFromSpace:
+        if (r->IsInFromSpace()) {
+          bytes += r->ObjectsAllocated();
+        }
+        break;
+      case kUnevacFromSpace:
+        if (r->IsInUnevacFromSpace()) {
+          bytes += r->ObjectsAllocated();
+        }
+        break;
+      case kToSpace:
+        if (r->IsInToSpace()) {
+          bytes += r->ObjectsAllocated();
+        }
+        break;
+      default:
+        LOG(FATAL) << "Unexpected space type : " << static_cast<int>(kSubSpaceType);
+    }
+  }
+  return bytes;
+}
+
+template<bool kToSpaceOnly>
+void RegionSpace::WalkInternal(ObjectCallback* callback, void* arg) {
+  // TODO: MutexLock on region_lock_ won't work due to lock order
+  // issues (the classloader classes lock and the monitor lock). We
+  // call this with threads suspended.
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsFree() || (kToSpaceOnly && !r->IsInToSpace())) {
+      continue;
+    }
+    if (r->IsLarge()) {
+      mirror::Object* obj = reinterpret_cast<mirror::Object*>(r->Begin());
+      if (obj->GetClass() != nullptr) {
+        callback(obj, arg);
+      }
+    } else if (r->IsLargeTail()) {
+      // Do nothing.
+    } else {
+      uint8_t* pos = r->Begin();
+      uint8_t* top = r->Top();
+      while (pos < top) {
+        mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
+        if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
+          callback(obj, arg);
+          pos = reinterpret_cast<uint8_t*>(GetNextObject(obj));
+        } else {
+          break;
+        }
+      }
+    }
+  }
+}
+
+inline mirror::Object* RegionSpace::GetNextObject(mirror::Object* obj) {
+  const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf();
+  return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment));
+}
+
+template<bool kForEvac>
+mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocated,
+                                        size_t* usable_size) {
+  DCHECK(IsAligned<kAlignment>(num_bytes));
+  DCHECK_GT(num_bytes, kRegionSize);
+  size_t num_regs = RoundUp(num_bytes, kRegionSize) / kRegionSize;
+  DCHECK_GT(num_regs, 0U);
+  DCHECK_LT((num_regs - 1) * kRegionSize, num_bytes);
+  DCHECK_LE(num_bytes, num_regs * kRegionSize);
+  MutexLock mu(Thread::Current(), region_lock_);
+  if (!kForEvac) {
+    // Retain sufficient free regions for full evacuation.
+    if ((num_non_free_regions_ + num_regs) * 2 > num_regions_) {
+      return nullptr;
+    }
+  }
+  // Find a large enough contiguous free regions.
+  size_t left = 0;
+  while (left + num_regs - 1 < num_regions_) {
+    bool found = true;
+    size_t right = left;
+    DCHECK_LT(right, left + num_regs)
+        << "The inner loop Should iterate at least once";
+    while (right < left + num_regs) {
+      if (regions_[right].IsFree()) {
+        ++right;
+      } else {
+        found = false;
+        break;
+      }
+    }
+    if (found) {
+      // right points to the one region past the last free region.
+      DCHECK_EQ(left + num_regs, right);
+      Region* first_reg = &regions_[left];
+      DCHECK(first_reg->IsFree());
+      first_reg->UnfreeLarge(time_);
+      ++num_non_free_regions_;
+      first_reg->SetTop(first_reg->Begin() + num_bytes);
+      for (size_t p = left + 1; p < right; ++p) {
+        DCHECK_LT(p, num_regions_);
+        DCHECK(regions_[p].IsFree());
+        regions_[p].UnfreeLargeTail(time_);
+        ++num_non_free_regions_;
+      }
+      *bytes_allocated = num_bytes;
+      if (usable_size != nullptr) {
+        *usable_size = num_regs * kRegionSize;
+      }
+      return reinterpret_cast<mirror::Object*>(first_reg->Begin());
+    } else {
+      // right points to the non-free region. Start with the one after it.
+      left = right + 1;
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
new file mode 100644
index 0000000..2ecb79e
--- /dev/null
+++ b/runtime/gc/space/region_space.cc
@@ -0,0 +1,412 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bump_pointer_space.h"
+#include "bump_pointer_space-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/class-inl.h"
+#include "thread_list.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+// If a region has live objects whose size is less than this percent
+// value of the region size, evaculate the region.
+static constexpr uint kEvaculateLivePercentThreshold = 75U;
+
+RegionSpace* RegionSpace::Create(const std::string& name, size_t capacity,
+                                 uint8_t* requested_begin) {
+  capacity = RoundUp(capacity, kRegionSize);
+  std::string error_msg;
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
+                                                       PROT_READ | PROT_WRITE, true, &error_msg));
+  if (mem_map.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+        << PrettySize(capacity) << " with message " << error_msg;
+    MemMap::DumpMaps(LOG(ERROR));
+    return nullptr;
+  }
+  return new RegionSpace(name, mem_map.release());
+}
+
+RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map)
+    : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                                 kGcRetentionPolicyAlwaysCollect),
+      region_lock_("Region lock", kRegionSpaceRegionLock), time_(1U) {
+  size_t mem_map_size = mem_map->Size();
+  CHECK_ALIGNED(mem_map_size, kRegionSize);
+  CHECK_ALIGNED(mem_map->Begin(), kRegionSize);
+  num_regions_ = mem_map_size / kRegionSize;
+  num_non_free_regions_ = 0U;
+  DCHECK_GT(num_regions_, 0U);
+  regions_.reset(new Region[num_regions_]);
+  uint8_t* region_addr = mem_map->Begin();
+  for (size_t i = 0; i < num_regions_; ++i, region_addr += kRegionSize) {
+    regions_[i] = Region(i, region_addr, region_addr + kRegionSize);
+  }
+  if (kIsDebugBuild) {
+    CHECK_EQ(regions_[0].Begin(), Begin());
+    for (size_t i = 0; i < num_regions_; ++i) {
+      CHECK(regions_[i].IsFree());
+      CHECK_EQ(static_cast<size_t>(regions_[i].End() - regions_[i].Begin()), kRegionSize);
+      if (i + 1 < num_regions_) {
+        CHECK_EQ(regions_[i].End(), regions_[i + 1].Begin());
+      }
+    }
+    CHECK_EQ(regions_[num_regions_ - 1].End(), Limit());
+  }
+  full_region_ = Region();
+  DCHECK(!full_region_.IsFree());
+  DCHECK(full_region_.IsNormal());
+  current_region_ = &full_region_;
+  evac_region_ = nullptr;
+  size_t ignored;
+  DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr) == nullptr);
+}
+
+size_t RegionSpace::FromSpaceSize() {
+  uint64_t num_regions = 0;
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsInFromSpace()) {
+      ++num_regions;
+    }
+  }
+  return num_regions * kRegionSize;
+}
+
+size_t RegionSpace::UnevacFromSpaceSize() {
+  uint64_t num_regions = 0;
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsInUnevacFromSpace()) {
+      ++num_regions;
+    }
+  }
+  return num_regions * kRegionSize;
+}
+
+size_t RegionSpace::ToSpaceSize() {
+  uint64_t num_regions = 0;
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsInToSpace()) {
+      ++num_regions;
+    }
+  }
+  return num_regions * kRegionSize;
+}
+
+inline bool RegionSpace::Region::ShouldBeEvacuated() {
+  DCHECK(state_ == kRegionToSpace || state_ == kRegionLargeToSpace);
+  // if the region was allocated after the start of the
+  // previous GC or the live ratio is below threshold, evacuate
+  // it.
+  bool result;
+  if (is_newly_allocated_) {
+    result = true;
+  } else {
+    bool is_live_percent_valid = live_bytes_ != static_cast<size_t>(-1);
+    if (is_live_percent_valid) {
+      uint live_percent = GetLivePercent();
+      if (state_ == kRegionToSpace) {
+        // Side node: live_percent == 0 does not necessarily mean
+        // there's no live objects due to rounding (there may be a
+        // few).
+        result = live_percent < kEvaculateLivePercentThreshold;
+      } else {
+        DCHECK(state_ == kRegionLargeToSpace);
+        result = live_percent == 0U;
+      }
+    } else {
+      result = false;
+    }
+  }
+  return result;
+}
+
+// Determine which regions to evacuate and mark them as
+// from-space. Mark the rest as unevacuated from-space.
+void RegionSpace::SetFromSpace(accounting::ReadBarrierTable* rb_table, bool force_evacuate_all) {
+  ++time_;
+  if (kUseTableLookupReadBarrier) {
+    DCHECK(rb_table->IsAllCleared());
+    rb_table->SetAll();
+  }
+  MutexLock mu(Thread::Current(), region_lock_);
+  size_t num_expected_large_tails = 0;
+  bool prev_large_evacuated = false;
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    RegionState state = static_cast<RegionState>(r->state_);
+    if (!r->IsFree()) {
+      DCHECK(r->IsInToSpace());
+      if (LIKELY(num_expected_large_tails == 0U)) {
+        DCHECK(state == kRegionToSpace || state == kRegionLargeToSpace);
+        bool should_evacuate = force_evacuate_all || r->ShouldBeEvacuated();
+        if (should_evacuate) {
+          r->SetAsFromSpace();
+          DCHECK(r->IsInFromSpace());
+        } else {
+          r->SetAsUnevacFromSpace();
+          DCHECK(r->IsInUnevacFromSpace());
+        }
+        if (UNLIKELY(state == kRegionLargeToSpace)) {
+          prev_large_evacuated = should_evacuate;
+          num_expected_large_tails = RoundUp(r->BytesAllocated(), kRegionSize) / kRegionSize - 1;
+          DCHECK_GT(num_expected_large_tails, 0U);
+        }
+      } else {
+        DCHECK(state == kRegionLargeTailToSpace);
+        if (prev_large_evacuated) {
+          r->SetAsFromSpace();
+          DCHECK(r->IsInFromSpace());
+        } else {
+          r->SetAsUnevacFromSpace();
+          DCHECK(r->IsInUnevacFromSpace());
+        }
+        --num_expected_large_tails;
+      }
+    } else {
+      DCHECK_EQ(num_expected_large_tails, 0U);
+      if (kUseTableLookupReadBarrier) {
+        // Clear the rb table for to-space regions.
+        rb_table->Clear(r->Begin(), r->End());
+      }
+    }
+  }
+  current_region_ = &full_region_;
+  evac_region_ = &full_region_;
+}
+
+void RegionSpace::ClearFromSpace() {
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsInFromSpace()) {
+      r->Clear();
+      --num_non_free_regions_;
+    } else if (r->IsInUnevacFromSpace()) {
+      r->SetUnevacFromSpaceAsToSpace();
+    }
+  }
+  evac_region_ = nullptr;
+}
+
+void RegionSpace::AssertAllRegionLiveBytesZeroOrCleared() {
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), region_lock_);
+    for (size_t i = 0; i < num_regions_; ++i) {
+      Region* r = &regions_[i];
+      size_t live_bytes = r->LiveBytes();
+      CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
+    }
+  }
+}
+
+void RegionSpace::LogFragmentationAllocFailure(std::ostream& os,
+                                               size_t /* failed_alloc_bytes */) {
+  size_t max_contiguous_allocation = 0;
+  MutexLock mu(Thread::Current(), region_lock_);
+  if (current_region_->End() - current_region_->Top() > 0) {
+    max_contiguous_allocation = current_region_->End() - current_region_->Top();
+  }
+  if (num_non_free_regions_ * 2 < num_regions_) {
+    // We reserve half of the regions for evaluation only. If we
+    // occupy more than half the regions, do not report the free
+    // regions as available.
+    size_t max_contiguous_free_regions = 0;
+    size_t num_contiguous_free_regions = 0;
+    bool prev_free_region = false;
+    for (size_t i = 0; i < num_regions_; ++i) {
+      Region* r = &regions_[i];
+      if (r->IsFree()) {
+        if (!prev_free_region) {
+          CHECK_EQ(num_contiguous_free_regions, 0U);
+          prev_free_region = true;
+        }
+        ++num_contiguous_free_regions;
+      } else {
+        if (prev_free_region) {
+          CHECK_NE(num_contiguous_free_regions, 0U);
+          max_contiguous_free_regions = std::max(max_contiguous_free_regions,
+                                                 num_contiguous_free_regions);
+          num_contiguous_free_regions = 0U;
+          prev_free_region = false;
+        }
+      }
+    }
+    max_contiguous_allocation = std::max(max_contiguous_allocation,
+                                         max_contiguous_free_regions * kRegionSize);
+  }
+  os << "; failed due to fragmentation (largest possible contiguous allocation "
+     <<  max_contiguous_allocation << " bytes)";
+  // Caller's job to print failed_alloc_bytes.
+}
+
+void RegionSpace::Clear() {
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (!r->IsFree()) {
+      --num_non_free_regions_;
+    }
+    r->Clear();
+  }
+  current_region_ = &full_region_;
+  evac_region_ = &full_region_;
+}
+
+void RegionSpace::Dump(std::ostream& os) const {
+  os << GetName() << " "
+      << reinterpret_cast<void*>(Begin()) << "-" << reinterpret_cast<void*>(Limit());
+}
+
+void RegionSpace::FreeLarge(mirror::Object* large_obj, size_t bytes_allocated) {
+  DCHECK(Contains(large_obj));
+  DCHECK(IsAligned<kRegionSize>(large_obj));
+  MutexLock mu(Thread::Current(), region_lock_);
+  uint8_t* begin_addr = reinterpret_cast<uint8_t*>(large_obj);
+  uint8_t* end_addr = AlignUp(reinterpret_cast<uint8_t*>(large_obj) + bytes_allocated, kRegionSize);
+  CHECK_LT(begin_addr, end_addr);
+  for (uint8_t* addr = begin_addr; addr < end_addr; addr += kRegionSize) {
+    Region* reg = RefToRegionLocked(reinterpret_cast<mirror::Object*>(addr));
+    if (addr == begin_addr) {
+      DCHECK(reg->IsLarge());
+    } else {
+      DCHECK(reg->IsLargeTail());
+    }
+    reg->Clear();
+    --num_non_free_regions_;
+  }
+  if (end_addr < Limit()) {
+    // If we aren't at the end of the space, check that the next region is not a large tail.
+    Region* following_reg = RefToRegionLocked(reinterpret_cast<mirror::Object*>(end_addr));
+    DCHECK(!following_reg->IsLargeTail());
+  }
+}
+
+void RegionSpace::DumpRegions(std::ostream& os) {
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    regions_[i].Dump(os);
+  }
+}
+
+void RegionSpace::DumpNonFreeRegions(std::ostream& os) {
+  MutexLock mu(Thread::Current(), region_lock_);
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* reg = &regions_[i];
+    if (!reg->IsFree()) {
+      reg->Dump(os);
+    }
+  }
+}
+
+void RegionSpace::RecordAlloc(mirror::Object* ref) {
+  CHECK(ref != nullptr);
+  Region* r = RefToRegion(ref);
+  reinterpret_cast<Atomic<uint64_t>*>(&r->objects_allocated_)->FetchAndAddSequentiallyConsistent(1);
+}
+
+bool RegionSpace::AllocNewTlab(Thread* self) {
+  MutexLock mu(self, region_lock_);
+  RevokeThreadLocalBuffersLocked(self);
+  // Retain sufficient free regions for full evacuation.
+  if ((num_non_free_regions_ + 1) * 2 > num_regions_) {
+    return false;
+  }
+  for (size_t i = 0; i < num_regions_; ++i) {
+    Region* r = &regions_[i];
+    if (r->IsFree()) {
+      r->Unfree(time_);
+      ++num_non_free_regions_;
+      // TODO: this is buggy. Debug it.
+      // r->SetNewlyAllocated();
+      r->SetTop(r->End());
+      r->is_a_tlab_ = true;
+      r->thread_ = self;
+      self->SetTlab(r->Begin(), r->End());
+      return true;
+    }
+  }
+  return false;
+}
+
+void RegionSpace::RevokeThreadLocalBuffers(Thread* thread) {
+  MutexLock mu(Thread::Current(), region_lock_);
+  RevokeThreadLocalBuffersLocked(thread);
+}
+
+void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
+  uint8_t* tlab_start = thread->GetTlabStart();
+  DCHECK_EQ(thread->HasTlab(), tlab_start != nullptr);
+  if (tlab_start != nullptr) {
+    DCHECK(IsAligned<kRegionSize>(tlab_start));
+    Region* r = RefToRegionLocked(reinterpret_cast<mirror::Object*>(tlab_start));
+    DCHECK(r->IsNormal());
+    DCHECK_EQ(thread->GetThreadLocalBytesAllocated(), kRegionSize);
+    r->RecordThreadLocalAllocations(thread->GetThreadLocalObjectsAllocated(),
+                                    thread->GetThreadLocalBytesAllocated());
+    r->is_a_tlab_ = false;
+    r->thread_ = nullptr;
+  }
+  thread->SetTlab(nullptr, nullptr);
+}
+
+void RegionSpace::RevokeAllThreadLocalBuffers() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+  MutexLock mu2(self, *Locks::thread_list_lock_);
+  std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+  for (Thread* thread : thread_list) {
+    RevokeThreadLocalBuffers(thread);
+  }
+}
+
+void RegionSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) {
+  if (kIsDebugBuild) {
+    DCHECK(!thread->HasTlab());
+  }
+}
+
+void RegionSpace::AssertAllThreadLocalBuffersAreRevoked() {
+  if (kIsDebugBuild) {
+    Thread* self = Thread::Current();
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+    for (Thread* thread : thread_list) {
+      AssertThreadLocalBuffersAreRevoked(thread);
+    }
+  }
+}
+
+void RegionSpace::Region::Dump(std::ostream& os) const {
+  os << "Region[" << idx_ << "]=" << reinterpret_cast<void*>(begin_) << "-" << reinterpret_cast<void*>(top_)
+     << "-" << reinterpret_cast<void*>(end_)
+     << " state=" << static_cast<uint>(state_) << " objects_allocated=" << objects_allocated_
+     << " alloc_time=" << alloc_time_ << " live_bytes=" << live_bytes_
+     << " is_newly_allocated=" << is_newly_allocated_ << " is_a_tlab=" << is_a_tlab_ << " thread=" << thread_ << "\n";
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
new file mode 100644
index 0000000..b4a043f
--- /dev/null
+++ b/runtime/gc/space/region_space.h
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_REGION_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_REGION_SPACE_H_
+
+#include "object_callbacks.h"
+#include "space.h"
+#include "gc/accounting/read_barrier_table.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+// A space that consists of equal-sized regions.
+class RegionSpace FINAL : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const OVERRIDE {
+    return kSpaceTypeRegionSpace;
+  }
+
+  // Create a region space with the requested sizes. The requested base address is not
+  // guaranteed to be granted, if it is required, the caller should call Begin on the returned
+  // space to confirm the request was granted.
+  static RegionSpace* Create(const std::string& name, size_t capacity, uint8_t* requested_begin);
+
+  // Allocate num_bytes, returns nullptr if the space is full.
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE;
+  // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector.
+  mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                    size_t* usable_size)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // The main allocation routine.
+  template<bool kForEvac>
+  ALWAYS_INLINE mirror::Object* AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated,
+                                                size_t* usable_size);
+  // Allocate/free large objects (objects that are larger than the region size.)
+  template<bool kForEvac>
+  mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size);
+  void FreeLarge(mirror::Object* large_obj, size_t bytes_allocated);
+
+  // Return the storage space required by obj.
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocationSizeNonvirtual(obj, usable_size);
+  }
+  size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  size_t Free(Thread*, mirror::Object*) OVERRIDE {
+    UNIMPLEMENTED(FATAL);
+    return 0;
+  }
+  size_t FreeList(Thread*, size_t, mirror::Object**) OVERRIDE {
+    UNIMPLEMENTED(FATAL);
+    return 0;
+  }
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
+    // No live bitmap.
+    return nullptr;
+  }
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
+    // No mark bitmap.
+    return nullptr;
+  }
+
+  void Clear() OVERRIDE LOCKS_EXCLUDED(region_lock_);
+
+  void Dump(std::ostream& os) const;
+  void DumpRegions(std::ostream& os);
+  void DumpNonFreeRegions(std::ostream& os);
+
+  void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_);
+  void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(region_lock_);
+  void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
+                                                    Locks::thread_list_lock_);
+  void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(region_lock_);
+  void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
+                                                              Locks::thread_list_lock_);
+
+  enum SubSpaceType {
+    kAllSpaces,        // All spaces.
+    kFromSpace,        // From-space. To be evacuated.
+    kUnevacFromSpace,  // Unevacuated from-space. Not to be evacuated.
+    kToSpace,          // To-space.
+  };
+
+  template<SubSpaceType kSubSpaceType> uint64_t GetBytesAllocatedInternal();
+  template<SubSpaceType kSubSpaceType> uint64_t GetObjectsAllocatedInternal();
+  uint64_t GetBytesAllocated() {
+    return GetBytesAllocatedInternal<kAllSpaces>();
+  }
+  uint64_t GetObjectsAllocated() {
+    return GetObjectsAllocatedInternal<kAllSpaces>();
+  }
+  uint64_t GetBytesAllocatedInFromSpace() {
+    return GetBytesAllocatedInternal<kFromSpace>();
+  }
+  uint64_t GetObjectsAllocatedInFromSpace() {
+    return GetObjectsAllocatedInternal<kFromSpace>();
+  }
+  uint64_t GetBytesAllocatedInUnevacFromSpace() {
+    return GetBytesAllocatedInternal<kUnevacFromSpace>();
+  }
+  uint64_t GetObjectsAllocatedInUnevacFromSpace() {
+    return GetObjectsAllocatedInternal<kUnevacFromSpace>();
+  }
+
+  bool CanMoveObjects() const OVERRIDE {
+    return true;
+  }
+
+  bool Contains(const mirror::Object* obj) const {
+    const uint8_t* byte_obj = reinterpret_cast<const uint8_t*>(obj);
+    return byte_obj >= Begin() && byte_obj < Limit();
+  }
+
+  RegionSpace* AsRegionSpace() OVERRIDE {
+    return this;
+  }
+
+  // Go through all of the blocks and visit the continuous objects.
+  void Walk(ObjectCallback* callback, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    WalkInternal<false>(callback, arg);
+  }
+
+  void WalkToSpace(ObjectCallback* callback, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    WalkInternal<true>(callback, arg);
+  }
+
+  accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE {
+    return nullptr;
+  }
+  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Object alignment within the space.
+  static constexpr size_t kAlignment = kObjectAlignment;
+  // The region size.
+  static constexpr size_t kRegionSize = 1 * MB;
+
+  bool IsInFromSpace(mirror::Object* ref) {
+    if (HasAddress(ref)) {
+      Region* r = RefToRegionUnlocked(ref);
+      return r->IsInFromSpace();
+    }
+    return false;
+  }
+
+  bool IsInUnevacFromSpace(mirror::Object* ref) {
+    if (HasAddress(ref)) {
+      Region* r = RefToRegionUnlocked(ref);
+      return r->IsInUnevacFromSpace();
+    }
+    return false;
+  }
+
+  bool IsInToSpace(mirror::Object* ref) {
+    if (HasAddress(ref)) {
+      Region* r = RefToRegionUnlocked(ref);
+      return r->IsInToSpace();
+    }
+    return false;
+  }
+
+  void SetFromSpace(accounting::ReadBarrierTable* rb_table, bool force_evacuate_all)
+      LOCKS_EXCLUDED(region_lock_);
+
+  size_t FromSpaceSize();
+  size_t UnevacFromSpaceSize();
+  size_t ToSpaceSize();
+  void ClearFromSpace();
+
+  void AddLiveBytes(mirror::Object* ref, size_t alloc_size) {
+    Region* reg = RefToRegion(ref);
+    reg->AddLiveBytes(alloc_size);
+  }
+
+  void AssertAllRegionLiveBytesZeroOrCleared();
+
+  void RecordAlloc(mirror::Object* ref);
+  bool AllocNewTlab(Thread* self);
+
+  uint32_t Time() {
+    return time_;
+  }
+
+ private:
+  RegionSpace(const std::string& name, MemMap* mem_map);
+
+  template<bool kToSpaceOnly>
+  void WalkInternal(ObjectCallback* callback, void* arg) NO_THREAD_SAFETY_ANALYSIS;
+
+  enum RegionState {
+    kRegionFree,                      // Free region.
+    kRegionToSpace,                   // To-space region.
+    kRegionFromSpace,                 // From-space region. To be evacuated.
+    kRegionUnevacFromSpace,           // Unevacuated from-space region. Not to be evacuated.
+    kRegionLargeToSpace,              // Large (allocation larger than the region size) to-space.
+    kRegionLargeFromSpace,            // Large from-space. To be evacuated.
+    kRegionLargeUnevacFromSpace,      // Large unevacuated from-space.
+    kRegionLargeTailToSpace,          // Large tail (non-first regions of a large allocation).
+    kRegionLargeTailFromSpace,        // Large tail from-space.
+    kRegionLargeTailUnevacFromSpace,  // Large tail unevacuated from-space.
+  };
+
+  class Region {
+   public:
+    Region()
+        : idx_(static_cast<size_t>(-1)),
+          begin_(nullptr), top_(nullptr), end_(nullptr), state_(kRegionToSpace),
+          objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)),
+          is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {}
+
+    Region(size_t idx, uint8_t* begin, uint8_t* end)
+        : idx_(idx), begin_(begin), top_(begin), end_(end), state_(kRegionFree),
+          objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)),
+          is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {
+      DCHECK_LT(begin, end);
+      DCHECK_EQ(static_cast<size_t>(end - begin), kRegionSize);
+    }
+
+    void Clear() {
+      top_ = begin_;
+      state_ = kRegionFree;
+      objects_allocated_ = 0;
+      alloc_time_ = 0;
+      live_bytes_ = static_cast<size_t>(-1);
+      if (!kMadviseZeroes) {
+        memset(begin_, 0, end_ - begin_);
+      }
+      madvise(begin_, end_ - begin_, MADV_DONTNEED);
+      is_newly_allocated_ = false;
+      is_a_tlab_ = false;
+      thread_ = nullptr;
+    }
+
+    ALWAYS_INLINE mirror::Object* Alloc(size_t num_bytes, size_t* bytes_allocated,
+                                        size_t* usable_size);
+
+    bool IsFree() const {
+      bool is_free = state_ == kRegionFree;
+      if (is_free) {
+        DCHECK_EQ(begin_, top_);
+        DCHECK_EQ(objects_allocated_, 0U);
+      }
+      return is_free;
+    }
+
+    // Given a free region, declare it non-free (allocated).
+    void Unfree(uint32_t alloc_time) {
+      DCHECK(IsFree());
+      state_ = kRegionToSpace;
+      alloc_time_ = alloc_time;
+    }
+
+    void UnfreeLarge(uint32_t alloc_time) {
+      DCHECK(IsFree());
+      state_ = kRegionLargeToSpace;
+      alloc_time_ = alloc_time;
+    }
+
+    void UnfreeLargeTail(uint32_t alloc_time) {
+      DCHECK(IsFree());
+      state_ = kRegionLargeTailToSpace;
+      alloc_time_ = alloc_time;
+    }
+
+    void SetNewlyAllocated() {
+      is_newly_allocated_ = true;
+    }
+
+    // Non-large, non-large-tail.
+    bool IsNormal() const {
+      return state_ == kRegionToSpace || state_ == kRegionFromSpace ||
+          state_ == kRegionUnevacFromSpace;
+    }
+
+    bool IsLarge() const {
+      bool is_large = state_ == kRegionLargeToSpace || state_ == kRegionLargeFromSpace ||
+          state_ == kRegionLargeUnevacFromSpace;
+      if (is_large) {
+        DCHECK_LT(begin_ + 1 * MB, top_);
+      }
+      return is_large;
+    }
+
+    bool IsLargeTail() const {
+      bool is_large_tail = state_ == kRegionLargeTailToSpace ||
+          state_ == kRegionLargeTailFromSpace ||
+          state_ == kRegionLargeTailUnevacFromSpace;
+      if (is_large_tail) {
+        DCHECK_EQ(begin_, top_);
+      }
+      return is_large_tail;
+    }
+
+    size_t Idx() const {
+      return idx_;
+    }
+
+    bool IsInFromSpace() const {
+      return state_ == kRegionFromSpace || state_ == kRegionLargeFromSpace ||
+          state_ == kRegionLargeTailFromSpace;
+    }
+
+    bool IsInToSpace() const {
+      return state_ == kRegionToSpace || state_ == kRegionLargeToSpace ||
+          state_ == kRegionLargeTailToSpace;
+    }
+
+    bool IsInUnevacFromSpace() const {
+      return state_ == kRegionUnevacFromSpace || state_ == kRegionLargeUnevacFromSpace ||
+          state_ == kRegionLargeTailUnevacFromSpace;
+    }
+
+    void SetAsFromSpace() {
+      switch (state_) {
+        case kRegionToSpace:
+          state_ = kRegionFromSpace;
+          break;
+        case kRegionLargeToSpace:
+          state_ = kRegionLargeFromSpace;
+          break;
+        case kRegionLargeTailToSpace:
+          state_ = kRegionLargeTailFromSpace;
+          break;
+        default:
+          LOG(FATAL) << "Unexpected region state : " << static_cast<uint>(state_)
+                     << " idx=" << idx_;
+      }
+      live_bytes_ = static_cast<size_t>(-1);
+    }
+
+    void SetAsUnevacFromSpace() {
+      switch (state_) {
+        case kRegionToSpace:
+          state_ = kRegionUnevacFromSpace;
+          break;
+        case kRegionLargeToSpace:
+          state_ = kRegionLargeUnevacFromSpace;
+          break;
+        case kRegionLargeTailToSpace:
+          state_ = kRegionLargeTailUnevacFromSpace;
+          break;
+        default:
+          LOG(FATAL) << "Unexpected region state : " << static_cast<uint>(state_)
+                     << " idx=" << idx_;
+      }
+      live_bytes_ = 0U;
+    }
+
+    void SetUnevacFromSpaceAsToSpace() {
+      switch (state_) {
+        case kRegionUnevacFromSpace:
+          state_ = kRegionToSpace;
+          break;
+        case kRegionLargeUnevacFromSpace:
+          state_ = kRegionLargeToSpace;
+          break;
+        case kRegionLargeTailUnevacFromSpace:
+          state_ = kRegionLargeTailToSpace;
+          break;
+        default:
+          LOG(FATAL) << "Unexpected region state : " << static_cast<uint>(state_)
+                     << " idx=" << idx_;
+      }
+    }
+
+    ALWAYS_INLINE bool ShouldBeEvacuated();
+
+    void AddLiveBytes(size_t live_bytes) {
+      DCHECK(IsInUnevacFromSpace());
+      DCHECK(!IsLargeTail());
+      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
+      live_bytes_ += live_bytes;
+      DCHECK_LE(live_bytes_, BytesAllocated());
+    }
+
+    size_t LiveBytes() const {
+      return live_bytes_;
+    }
+
+    uint GetLivePercent() const {
+      DCHECK(IsInToSpace());
+      DCHECK(!IsLargeTail());
+      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
+      DCHECK_LE(live_bytes_, BytesAllocated());
+      size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
+      DCHECK_GE(bytes_allocated, 0U);
+      uint result = (live_bytes_ * 100U) / bytes_allocated;
+      DCHECK_LE(result, 100U);
+      return result;
+    }
+
+    size_t BytesAllocated() const {
+      if (IsLarge()) {
+        DCHECK_LT(begin_ + kRegionSize, top_);
+        return static_cast<size_t>(top_ - begin_);
+      } else if (IsLargeTail()) {
+        DCHECK_EQ(begin_, top_);
+        return 0;
+      } else {
+        DCHECK(IsNormal()) << static_cast<uint>(state_);
+        DCHECK_LE(begin_, top_);
+        size_t bytes = static_cast<size_t>(top_ - begin_);
+        DCHECK_LE(bytes, kRegionSize);
+        return bytes;
+      }
+    }
+
+    size_t ObjectsAllocated() const {
+      if (IsLarge()) {
+        DCHECK_LT(begin_ + 1 * MB, top_);
+        DCHECK_EQ(objects_allocated_, 0U);
+        return 1;
+      } else if (IsLargeTail()) {
+        DCHECK_EQ(begin_, top_);
+        DCHECK_EQ(objects_allocated_, 0U);
+        return 0;
+      } else {
+        DCHECK(IsNormal()) << static_cast<uint>(state_);
+        return objects_allocated_;
+      }
+    }
+
+    uint8_t* Begin() const {
+      return begin_;
+    }
+
+    uint8_t* Top() const {
+      return top_;
+    }
+
+    void SetTop(uint8_t* new_top) {
+      top_ = new_top;
+    }
+
+    uint8_t* End() const {
+      return end_;
+    }
+
+    bool Contains(mirror::Object* ref) const {
+      return begin_ <= reinterpret_cast<uint8_t*>(ref) && reinterpret_cast<uint8_t*>(ref) < end_;
+    }
+
+    void Dump(std::ostream& os) const;
+
+    void RecordThreadLocalAllocations(size_t num_objects, size_t num_bytes) {
+      DCHECK(IsNormal());
+      DCHECK_EQ(objects_allocated_, 0U);
+      DCHECK_EQ(top_, end_);
+      objects_allocated_ = num_objects;
+      top_ = begin_ + num_bytes;
+      DCHECK_EQ(top_, end_);
+    }
+
+   private:
+    size_t idx_;                   // The region's index in the region space.
+    uint8_t* begin_;               // The begin address of the region.
+    // Can't use Atomic<uint8_t*> as Atomic's copy operator is implicitly deleted.
+    uint8_t* top_;                 // The current position of the allocation.
+    uint8_t* end_;                 // The end address of the region.
+    uint8_t state_;                // The region state (see RegionState).
+    uint64_t objects_allocated_;   // The number of objects allocated.
+    uint32_t alloc_time_;          // The allocation time of the region.
+    size_t live_bytes_;            // The live bytes. Used to compute the live percent.
+    bool is_newly_allocated_;      // True if it's allocated after the last collection.
+    bool is_a_tlab_;               // True if it's a tlab.
+    Thread* thread_;               // The owning thread if it's a tlab.
+
+    friend class RegionSpace;
+  };
+
+  Region* RefToRegion(mirror::Object* ref) LOCKS_EXCLUDED(region_lock_) {
+    MutexLock mu(Thread::Current(), region_lock_);
+    return RefToRegionLocked(ref);
+  }
+
+  Region* RefToRegionUnlocked(mirror::Object* ref) NO_THREAD_SAFETY_ANALYSIS {
+    // For a performance reason (this is frequently called via
+    // IsInFromSpace() etc.) we avoid taking a lock here. Note that
+    // since we only change a region from to-space to from-space only
+    // during a pause (SetFromSpace()) and from from-space to free
+    // (after GC is done) as long as ref is a valid reference into an
+    // allocated region, it's safe to access the region state without
+    // the lock.
+    return RefToRegionLocked(ref);
+  }
+
+  Region* RefToRegionLocked(mirror::Object* ref) EXCLUSIVE_LOCKS_REQUIRED(region_lock_) {
+    DCHECK(HasAddress(ref));
+    uintptr_t offset = reinterpret_cast<uintptr_t>(ref) - reinterpret_cast<uintptr_t>(Begin());
+    size_t reg_idx = offset / kRegionSize;
+    DCHECK_LT(reg_idx, num_regions_);
+    Region* reg = &regions_[reg_idx];
+    DCHECK_EQ(reg->Idx(), reg_idx);
+    DCHECK(reg->Contains(ref));
+    return reg;
+  }
+
+  mirror::Object* GetNextObject(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  Mutex region_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  uint32_t time_;                  // The time as the number of collections since the startup.
+  size_t num_regions_;             // The number of regions in this space.
+  size_t num_non_free_regions_;    // The number of non-free regions in this space.
+  std::unique_ptr<Region[]> regions_ GUARDED_BY(region_lock_);
+                                   // The pointer to the region array.
+  Region* current_region_;         // The region that's being allocated currently.
+  Region* evac_region_;            // The region that's being evacuated to currently.
+  Region full_region_;             // The dummy/sentinel region that looks full.
+
+  DISALLOW_COPY_AND_ASSIGN(RegionSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_REGION_SPACE_H_
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 486d79a..a2e2c1c 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -58,6 +58,11 @@
   UNREACHABLE();
 }
 
+RegionSpace* Space::AsRegionSpace() {
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
+}
+
 AllocSpace* Space::AsAllocSpace() {
   UNIMPLEMENTED(FATAL) << "Unreachable";
   UNREACHABLE();
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 860a4c9..d24650b 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -50,6 +50,7 @@
 class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
+class RegionSpace;
 class ZygoteSpace;
 
 static constexpr bool kDebugSpaces = kIsDebugBuild;
@@ -72,6 +73,7 @@
   kSpaceTypeZygoteSpace,
   kSpaceTypeBumpPointerSpace,
   kSpaceTypeLargeObjectSpace,
+  kSpaceTypeRegionSpace,
 };
 std::ostream& operator<<(std::ostream& os, const SpaceType& space_type);
 
@@ -132,6 +134,11 @@
   }
   virtual BumpPointerSpace* AsBumpPointerSpace();
 
+  bool IsRegionSpace() const {
+    return GetType() == kSpaceTypeRegionSpace;
+  }
+  virtual RegionSpace* AsRegionSpace();
+
   // Does this space hold large objects and implement the large object space abstraction?
   bool IsLargeObjectSpace() const {
     return GetType() == kSpaceTypeLargeObjectSpace;
diff --git a/runtime/globals.h b/runtime/globals.h
index 93026da..0756a73 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -92,7 +92,15 @@
 static constexpr bool kUseBrooksReadBarrier = false;
 #endif
 
+#ifdef USE_TABLE_LOOKUP_READ_BARRIER
+static constexpr bool kUseTableLookupReadBarrier = true;
+#else
+static constexpr bool kUseTableLookupReadBarrier = false;
+#endif
+
 static constexpr bool kUseBakerOrBrooksReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier;
+static constexpr bool kUseReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier ||
+    kUseTableLookupReadBarrier;
 
 // If true, references within the heap are poisoned (negated).
 #ifdef ART_HEAP_POISONING
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 0b04276..d2e93bc 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -507,7 +507,7 @@
 
     Env env = { this, output };
     runtime->VisitRoots(RootVisitor, &env);
-    runtime->GetHeap()->VisitObjects(VisitObjectCallback, &env);
+    runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, &env);
 
     output->StartNewRecord(HPROF_TAG_HEAP_DUMP_END, kHprofTime);
     output->EndRecord();
@@ -1151,10 +1151,20 @@
 void DumpHeap(const char* filename, int fd, bool direct_to_ddms) {
   CHECK(filename != nullptr);
 
+  Thread* self = Thread::Current();
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  if (heap->IsGcConcurrentAndMoving()) {
+    // Need to take a heap dump while GC isn't running. See the
+    // comment in Heap::VisitObjects().
+    heap->IncrementDisableMovingGC(self);
+  }
   Runtime::Current()->GetThreadList()->SuspendAll();
   Hprof hprof(filename, fd, direct_to_ddms);
   hprof.Dump();
   Runtime::Current()->GetThreadList()->ResumeAll();
+  if (heap->IsGcConcurrentAndMoving()) {
+    heap->DecrementDisableMovingGC(self);
+  }
 }
 
 }  // namespace hprof
diff --git a/runtime/image.cc b/runtime/image.cc
index b83eeb1..269a07d 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -111,7 +111,17 @@
 }
 
 mirror::ObjectArray<mirror::Object>* ImageHeader::GetImageRoots() const {
-  return reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
+  // Need a read barrier as it's not visited during root scan.
+  // Pass in the address of the local variable to the read barrier
+  // rather than image_roots_ because it won't move (asserted below)
+  // and it's a const member.
+  mirror::ObjectArray<mirror::Object>* image_roots =
+      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
+  mirror::ObjectArray<mirror::Object>* result =
+      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kWithReadBarrier, true>(
+          &image_roots);
+  DCHECK_EQ(image_roots, result);
+  return result;
 }
 
 }  // namespace art
diff --git a/runtime/image.h b/runtime/image.h
index 7e2b847..3c527b8 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -118,7 +118,8 @@
 
   mirror::Object* GetImageRoot(ImageRoot image_root) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ObjectArray<mirror::Object>* GetImageRoots() const;
+  mirror::ObjectArray<mirror::Object>* GetImageRoots() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void RelocateImage(off_t delta);
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e336c38..90115c3 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -671,12 +671,14 @@
     return false;
   }
   // Not found. Add it.
+  static_assert(!kMovingMethods, "Not safe if methods can move");
   int32_t hash_code = method->IdentityHashCode();
   deoptimized_methods_.insert(std::make_pair(hash_code, GcRoot<mirror::ArtMethod>(method)));
   return true;
 }
 
 bool Instrumentation::FindDeoptimizedMethod(mirror::ArtMethod* method) {
+  static_assert(!kMovingMethods, "Not safe if methods can move");
   int32_t hash_code = method->IdentityHashCode();
   auto range = deoptimized_methods_.equal_range(hash_code);
   for (auto it = range.first; it != range.second; ++it) {
@@ -700,6 +702,7 @@
 }
 
 bool Instrumentation::RemoveDeoptimizedMethod(mirror::ArtMethod* method) {
+  static_assert(!kMovingMethods, "Not safe if methods can move");
   int32_t hash_code = method->IdentityHashCode();
   auto range = deoptimized_methods_.equal_range(hash_code);
   for (auto it = range.first; it != range.second; ++it) {
@@ -1008,15 +1011,14 @@
   // back to an upcall.
   NthCallerVisitor visitor(self, 1, true);
   visitor.WalkStack(true);
-  bool deoptimize = (visitor.caller != NULL) &&
+  bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller));
-  if (deoptimize && kVerboseInstrumentation) {
-    LOG(INFO) << "Deoptimizing into " << PrettyMethod(visitor.caller);
-  }
   if (deoptimize) {
     if (kVerboseInstrumentation) {
-      LOG(INFO) << "Deoptimizing from " << PrettyMethod(method)
-                << " result is " << std::hex << return_value.GetJ();
+      LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
+                                PrettyMethod(visitor.caller).c_str(),
+                                PrettyMethod(method).c_str(),
+                                return_value.GetJ()) << *self;
     }
     self->SetDeoptimizationReturnValue(return_value);
     return GetTwoWordSuccessValue(*return_pc,
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index f92f209..19bfc4e 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -192,6 +192,7 @@
     const DexFile::StringId* string_id = dex_file->FindStringId(utf8.c_str());
     if (string_id != nullptr) {
       uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
+      // GetResolvedString() contains a RB.
       mirror::String* image_string = dex_cache->GetResolvedString(string_idx);
       if (image_string != NULL) {
         return image_string;
@@ -214,6 +215,13 @@
   allow_new_interns_ = false;
 }
 
+void InternTable::EnsureNewInternsDisallowed() {
+  // Lock and unlock once to ensure that no threads are still in the
+  // middle of adding new interns.
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  CHECK(!allow_new_interns_);
+}
+
 mirror::String* InternTable::Insert(mirror::String* s, bool is_strong) {
   if (s == nullptr) {
     return nullptr;
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 371d3f7..2e31b7e 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -85,8 +85,9 @@
 
   void DumpForSigQuit(std::ostream& os) const;
 
-  void DisallowNewInterns() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DisallowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnsureNewInternsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Adds all of the resolved image strings from the image space into the intern table. The
   // advantage of doing this is preventing expensive DexFile::FindStringId calls.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index b04a18b..9d988e9 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -493,7 +493,23 @@
   while (shadow_frame != NULL) {
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
-    value = Execute(self, code_item, *shadow_frame, value);
+    const uint32_t dex_pc = shadow_frame->GetDexPC();
+    uint32_t new_dex_pc;
+    if (UNLIKELY(self->IsExceptionPending())) {
+      const instrumentation::Instrumentation* const instrumentation =
+          Runtime::Current()->GetInstrumentation();
+      uint32_t found_dex_pc = FindNextInstructionFollowingException(self, *shadow_frame, dex_pc,
+                                                                    instrumentation);
+      new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
+                                  // or DexFile::kDexNoIndex if there is none.
+    } else {
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      new_dex_pc = dex_pc + instr->SizeInCodeUnits();  // the dex pc of the next instruction.
+    }
+    if (new_dex_pc != DexFile::kDexNoIndex) {
+      shadow_frame->SetDexPC(new_dex_pc);
+      value = Execute(self, code_item, *shadow_frame, value);
+    }
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     delete old_frame;
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 8fcbf90..e4b3247 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -148,7 +148,10 @@
   const void* const* currentHandlersTable;
   bool notified_method_entry_event = false;
   UPDATE_HANDLER_TABLE();
-  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
+  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
+    if (kIsDebugBuild) {
+      self->AssertNoPendingException();
+    }
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -236,6 +239,7 @@
 
   HANDLE_INSTRUCTION_START(MOVE_EXCEPTION) {
     Throwable* exception = self->GetException(nullptr);
+    DCHECK(exception != nullptr) << "No pending exception on MOVE_EXCEPTION instruction";
     shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
     self->ClearException();
     ADVANCE(1);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 38665c7..2f85587 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -69,7 +69,10 @@
   uint32_t dex_pc = shadow_frame.GetDexPC();
   bool notified_method_entry_event = false;
   const instrumentation::Instrumentation* const instrumentation = Runtime::Current()->GetInstrumentation();
-  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
+  if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
+    if (kIsDebugBuild) {
+        self->AssertNoPendingException();
+    }
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
@@ -161,6 +164,7 @@
       case Instruction::MOVE_EXCEPTION: {
         PREAMBLE();
         Throwable* exception = self->GetException(nullptr);
+        DCHECK(exception != nullptr) << "No pending exception on MOVE_EXCEPTION instruction";
         shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
         self->ClearException();
         inst = inst->Next_1xx();
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 4643d14..40417d8 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -550,6 +550,13 @@
   weak_globals_add_condition_.Broadcast(self);
 }
 
+void JavaVMExt::EnsureNewWeakGlobalsDisallowed() {
+  // Lock and unlock once to ensure that no threads are still in the
+  // middle of adding new weak globals.
+  MutexLock mu(Thread::Current(), weak_globals_lock_);
+  CHECK(!allow_new_weak_globals_);
+}
+
 mirror::Object* JavaVMExt::DecodeGlobal(Thread* self, IndirectRef ref) {
   return globals_.SynchronizedGet(self, &globals_lock_, ref);
 }
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 749b9fb..c3f0a82 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -104,9 +104,9 @@
 
   void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DisallowNewWeakGlobals() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-
+  void DisallowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnsureNewWeakGlobalsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   jobject AddGlobalRef(Thread* self, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/jobject_comparator.cc b/runtime/jobject_comparator.cc
index 77f93ff..1f424b3 100644
--- a/runtime/jobject_comparator.cc
+++ b/runtime/jobject_comparator.cc
@@ -25,33 +25,32 @@
 
 bool JobjectComparator::operator()(jobject jobj1, jobject jobj2) const {
   // Ensure null references and cleared jweaks appear at the end.
-  if (jobj1 == NULL) {
+  if (jobj1 == nullptr) {
     return true;
-  } else if (jobj2 == NULL) {
+  } else if (jobj2 == nullptr) {
     return false;
   }
   ScopedObjectAccess soa(Thread::Current());
-  mirror::Object* obj1 = soa.Decode<mirror::Object*>(jobj1);
-  mirror::Object* obj2 = soa.Decode<mirror::Object*>(jobj2);
-  if (obj1 == NULL) {
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Object> obj1(hs.NewHandle(soa.Decode<mirror::Object*>(jobj1)));
+  Handle<mirror::Object> obj2(hs.NewHandle(soa.Decode<mirror::Object*>(jobj2)));
+  if (obj1.Get() == nullptr) {
     return true;
-  } else if (obj2 == NULL) {
+  } else if (obj2.Get() == nullptr) {
     return false;
   }
   // Sort by class...
   if (obj1->GetClass() != obj2->GetClass()) {
     return obj1->GetClass()->IdentityHashCode() < obj2->GetClass()->IdentityHashCode();
-  } else {
-    // ...then by size...
-    size_t count1 = obj1->SizeOf();
-    size_t count2 = obj2->SizeOf();
-    if (count1 != count2) {
-      return count1 < count2;
-    } else {
-      // ...and finally by identity hash code.
-      return obj1->IdentityHashCode() < obj2->IdentityHashCode();
-    }
   }
+  // ...then by size...
+  const size_t count1 = obj1->SizeOf();
+  const size_t count2 = obj2->SizeOf();
+  if (count1 != count2) {
+    return count1 < count2;
+  }
+  // ...and finally by identity hash code.
+  return obj1->IdentityHashCode() < obj2->IdentityHashCode();
 }
 
 }  // namespace art
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 3dc9e08..495f753 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -650,7 +650,14 @@
 template <bool kVisitClass, typename Visitor>
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
-  if (!IsTemp() && IsResolved()) {
+  // Right after a class is allocated, but not yet loaded
+  // (kStatusNotReady, see ClassLinkder::LoadClass()), GC may find it
+  // and scan it. IsTemp() may call Class::GetAccessFlags() but may
+  // fail in the DCHECK in Class::GetAccessFlags() because the class
+  // status is kStatusNotReady. To avoid it, rely on IsResolved()
+  // only. This is fine because a temp class never goes into the
+  // kStatusResolved state.
+  if (IsResolved()) {
     // Temp classes don't ever populate imt/vtable or static fields and they are not even
     // allocated with the right size for those. Also, unresolved classes don't have fields
     // linked yet.
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 121947d..d690163 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -154,7 +154,6 @@
     }
   } while (!atomic_rb_ptr->CompareExchangeWeakSequentiallyConsistent(expected_ref.reference_,
                                                                      new_ref.reference_));
-  DCHECK_EQ(new_ref.reference_, atomic_rb_ptr->LoadRelaxed());
   return true;
 #else
   UNUSED(expected_rb_ptr, rb_ptr);
@@ -826,6 +825,17 @@
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset,
                                                              Object* old_value, Object* new_value) {
+  bool success = CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier<
+      kTransactionActive, kCheckTransaction, kVerifyFlags>(field_offset, old_value, new_value);
+  if (success) {
+    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+  }
+  return success;
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier(
+    MemberOffset field_offset, Object* old_value, Object* new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -848,7 +858,14 @@
 
   bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_,
                                                                         new_ref.reference_);
+  return success;
+}
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset,
+                                                               Object* old_value, Object* new_value) {
+  bool success = CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<
+      kTransactionActive, kCheckTransaction, kVerifyFlags>(field_offset, old_value, new_value);
   if (success) {
     Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
   }
@@ -856,8 +873,8 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset,
-                                                             Object* old_value, Object* new_value) {
+inline bool Object::CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier(
+    MemberOffset field_offset, Object* old_value, Object* new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -880,10 +897,6 @@
 
   bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref.reference_,
                                                                           new_ref.reference_);
-
-  if (success) {
-    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
-  }
   return success;
 }
 
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 6914f38..9262a3e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -75,7 +75,7 @@
   uint8_t* dst_bytes = reinterpret_cast<uint8_t*>(dest);
   size_t offset = sizeof(Object);
   memcpy(dst_bytes + offset, src_bytes + offset, num_bytes - offset);
-  if (kUseBakerOrBrooksReadBarrier) {
+  if (kUseReadBarrier) {
     // We need a RB here. After the memcpy that covers the whole
     // object above, copy references fields one by one again with a
     // RB. TODO: Optimize this later?
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 07d15b5..780c5ae 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -240,12 +240,24 @@
   bool CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
                                                 Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                                   Object* old_value,
+                                                                   Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
                                                   Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                                     Object* old_value,
+                                                                     Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset);
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index fbc4f4a..96d426b 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -131,7 +131,7 @@
   CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
   IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
   IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
-  if (kUseBakerOrBrooksReadBarrier) {
+  if (kUseReadBarrier) {
     // TODO: Optimize this later?
     const bool copy_forward = (src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count);
     if (copy_forward) {
@@ -174,7 +174,7 @@
   CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
   IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
   IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
-  if (kUseBakerOrBrooksReadBarrier) {
+  if (kUseReadBarrier) {
     // TODO: Optimize this later?
     for (int i = 0; i < count; ++i) {
       // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 5e33380..5ed8c7d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1106,6 +1106,13 @@
   monitor_add_condition_.Broadcast(self);
 }
 
+void MonitorList::EnsureNewMonitorsDisallowed() {
+  // Lock and unlock once to ensure that no threads are still in the
+  // middle of adding new monitors.
+  MutexLock mu(Thread::Current(), monitor_list_lock_);
+  CHECK(!allow_new_monitors_);
+}
+
 void MonitorList::Add(Monitor* m) {
   Thread* self = Thread::Current();
   MutexLock mu(self, monitor_list_lock_);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 8f97a40..0c5f8a4 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -266,12 +266,13 @@
   MonitorList();
   ~MonitorList();
 
-  void Add(Monitor* m);
+  void Add(Monitor* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SweepMonitorList(IsMarkedCallback* callback, void* arg)
       LOCKS_EXCLUDED(monitor_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
   void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
+  void EnsureNewMonitorsDisallowed() LOCKS_EXCLUDED(monitor_list_lock_);
   // Returns how many monitors were deflated.
   size_t DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index efc4a71..4551d5b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -105,8 +105,15 @@
     verify_(true),
     image_isa_(kRuntimeISA),
     use_homogeneous_space_compaction_for_oom_(true),  // Enable hspace compaction on OOM by default.
-    min_interval_homogeneous_space_compaction_by_oom_(MsToNs(100 * 1000))  // 100s.
-    {}
+    min_interval_homogeneous_space_compaction_by_oom_(MsToNs(100 * 1000)) {  // 100s.
+  if (kUseReadBarrier) {
+    // If RB is enabled (currently a build-time decision), use CC as the default GC.
+    collector_type_ = gc::kCollectorTypeCC;
+    background_collector_type_ = gc::kCollectorTypeCC;  // Disable background compaction for CC.
+    interpreter_only_ = true;  // Disable the compiler for CC (for now).
+    // use_tlab_ = true;
+  }
+}
 
 ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
   std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 3517848..34f6713 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -204,9 +204,7 @@
     CHECK(code_item != nullptr);
     uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
-    const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
-    uint32_t new_dex_pc = dex_pc + inst->SizeInCodeUnits();
-    ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, m, new_dex_pc);
+    ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, m, dex_pc);
     StackHandleScope<3> hs(self_);
     mirror::Class* declaring_class = m->GetDeclaringClass();
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 0dc31e7..c74fded 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -19,43 +19,147 @@
 
 #include "read_barrier.h"
 
+#include "gc/collector/concurrent_copying.h"
+#include "gc/heap.h"
 #include "mirror/object_reference.h"
+#include "mirror/reference.h"
+#include "runtime.h"
 
 namespace art {
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
-  // Unused for now.
-  UNUSED(obj, offset, ref_addr);
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
-    // To be implemented.
-    return ref_addr->AsMirrorPtr();
+    // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero)
+    // is used to create artificial data dependency from the is_gray
+    // load to the ref field (ptr) load to avoid needing a load-load
+    // barrier between the two.
+    uintptr_t rb_ptr_high_bits;
+    bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+    ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
+        rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+    MirrorType* ref = ref_addr->AsMirrorPtr();
+    if (is_gray) {
+      // Slow-path.
+      ref = reinterpret_cast<MirrorType*>(Mark(ref));
+    }
+    if (kEnableReadBarrierInvariantChecks) {
+      CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
+    }
+    AssertToSpaceInvariant(obj, offset, ref);
+    return ref;
   } else if (with_read_barrier && kUseBrooksReadBarrier) {
     // To be implemented.
     return ref_addr->AsMirrorPtr();
+  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
+    MirrorType* ref = ref_addr->AsMirrorPtr();
+    MirrorType* old_ref = ref;
+    // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
+      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+      obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>(
+          offset, old_ref, ref);
+    }
+    AssertToSpaceInvariant(obj, offset, ref);
+    return ref;
   } else {
     // No read barrier.
     return ref_addr->AsMirrorPtr();
   }
 }
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
 inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) {
   MirrorType* ref = *root;
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
-    // To be implemented.
+    if (kMaybeDuringStartup && IsDuringStartup()) {
+      // During startup, the heap may not be initialized yet. Just
+      // return the given ref.
+      return ref;
+    }
+    // TODO: separate the read barrier code from the collector code more.
+    if (Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking()) {
+      ref = reinterpret_cast<MirrorType*>(Mark(ref));
+    }
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     return ref;
   } else if (with_read_barrier && kUseBrooksReadBarrier) {
     // To be implemented.
     return ref;
+  } else if (with_read_barrier && kUseTableLookupReadBarrier) {
+    if (kMaybeDuringStartup && IsDuringStartup()) {
+      // During startup, the heap may not be initialized yet. Just
+      // return the given ref.
+      return ref;
+    }
+    if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+      MirrorType* old_ref = ref;
+      ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
+      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+      Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+      atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref);
+    }
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    return ref;
   } else {
     return ref;
   }
 }
 
+inline bool ReadBarrier::IsDuringStartup() {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  if (heap == nullptr) {
+    // During startup, the heap can be null.
+    return true;
+  }
+  if (heap->CurrentCollectorType() != gc::kCollectorTypeCC) {
+    // CC isn't running.
+    return true;
+  }
+  gc::collector::ConcurrentCopying* collector = heap->ConcurrentCopyingCollector();
+  if (collector == nullptr) {
+    // During startup, the collector can be null.
+    return true;
+  }
+  return false;
+}
+
+inline void ReadBarrier::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
+                                                mirror::Object* ref) {
+  if (kEnableToSpaceInvariantChecks || kIsDebugBuild) {
+    if (ref == nullptr || IsDuringStartup()) {
+      return;
+    }
+    Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->
+        AssertToSpaceInvariant(obj, offset, ref);
+  }
+}
+
+inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) {
+  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj);
+}
+
+inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
+                                                   uintptr_t* out_rb_ptr_high_bits) {
+  mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
+  uintptr_t rb_ptr_bits = reinterpret_cast<uintptr_t>(rb_ptr);
+  uintptr_t rb_ptr_low_bits = rb_ptr_bits & rb_ptr_mask_;
+  if (kEnableReadBarrierInvariantChecks) {
+    CHECK(rb_ptr_low_bits == white_ptr_ || rb_ptr_low_bits == gray_ptr_ ||
+          rb_ptr_low_bits == black_ptr_)
+        << "obj=" << obj << " rb_ptr=" << rb_ptr << " " << PrettyTypeOf(obj);
+  }
+  bool is_gray = rb_ptr_low_bits == gray_ptr_;
+  // The high bits are supposed to be zero. We check this on the caller side.
+  *out_rb_ptr_high_bits = rb_ptr_bits & ~rb_ptr_mask_;
+  return is_gray;
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_READ_BARRIER_INL_H_
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index ed5db4e..474b46f 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"
 #include "base/macros.h"
+#include "jni.h"
 #include "offsets.h"
 #include "read_barrier_c.h"
 
@@ -26,25 +27,70 @@
 // which needs to be a C header file for asm_support.h.
 
 namespace art {
+
 namespace mirror {
+  class ArtField;
+  class ArtMethod;
   class Object;
   template<typename MirrorType> class HeapReference;
 }  // namespace mirror
 
 class ReadBarrier {
  public:
+  // TODO: disable thse flags for production use.
+  // Enable the to-space invariant checks.
+  static constexpr bool kEnableToSpaceInvariantChecks = true;
+  // Enable the read barrier checks.
+  static constexpr bool kEnableReadBarrierInvariantChecks = true;
+
   // It's up to the implementation whether the given field gets
   // updated whereas the return value must be an updated reference.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            bool kMaybeDuringStartup = false>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // It's up to the implementation whether the given root gets updated
   // whereas the return value must be an updated reference.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+            bool kMaybeDuringStartup = false>
   ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static bool IsDuringStartup();
+
+  // Without the holder object.
+  static void AssertToSpaceInvariant(mirror::Object* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+  }
+  // With the holder object.
+  static void AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
+                                     mirror::Object* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static mirror::Object* Mark(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static mirror::Object* WhitePtr() {
+    return reinterpret_cast<mirror::Object*>(white_ptr_);
+  }
+  static mirror::Object* GrayPtr() {
+    return reinterpret_cast<mirror::Object*>(gray_ptr_);
+  }
+  static mirror::Object* BlackPtr() {
+    return reinterpret_cast<mirror::Object*>(black_ptr_);
+  }
+
+  ALWAYS_INLINE static bool HasGrayReadBarrierPointer(mirror::Object* obj,
+                                                      uintptr_t* out_rb_ptr_high_bits)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
+  static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
+  static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
+  static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
+  static constexpr uintptr_t rb_ptr_mask_ = 0x3;  // The low 2 bits for white|gray|black.
 };
 
 }  // namespace art
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index 1385c60..49efaa2 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -22,10 +22,14 @@
 // include globals.h.
 
 // Uncomment one of the following two and the two fields in
-// Object.java (libcore) to enable baker or brooks pointers.
+// Object.java (libcore) to enable baker, brooks (unimplemented), or
+// table-lookup read barriers.
 
+#ifdef ART_USE_READ_BARRIER
 // #define USE_BAKER_READ_BARRIER
 // #define USE_BROOKS_READ_BARRIER
+#define USE_TABLE_LOOKUP_READ_BARRIER
+#endif
 
 #if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER)
 #define USE_BAKER_OR_BROOKS_READ_BARRIER
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index e454b20..357d454 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -71,33 +71,6 @@
   return obj->AsArray()->GetLength();
 }
 
-struct ObjectComparator {
-  bool operator()(GcRoot<mirror::Object> root1, GcRoot<mirror::Object> root2) const
-    // TODO: enable analysis when analysis can work with the STL.
-      NO_THREAD_SAFETY_ANALYSIS {
-    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-    mirror::Object* obj1 = root1.Read<kWithoutReadBarrier>();
-    mirror::Object* obj2 = root2.Read<kWithoutReadBarrier>();
-    DCHECK(obj1 != nullptr);
-    DCHECK(obj2 != nullptr);
-    Runtime* runtime = Runtime::Current();
-    DCHECK(!runtime->IsClearedJniWeakGlobal(obj1));
-    DCHECK(!runtime->IsClearedJniWeakGlobal(obj2));
-    // Sort by class...
-    if (obj1->GetClass() != obj2->GetClass()) {
-      return obj1->GetClass()->IdentityHashCode() < obj2->GetClass()->IdentityHashCode();
-    }
-    // ...then by size...
-    const size_t size1 = obj1->SizeOf();
-    const size_t size2 = obj2->SizeOf();
-    if (size1 != size2) {
-      return size1 < size2;
-    }
-    // ...and finally by identity hash code.
-    return obj1->IdentityHashCode() < obj2->IdentityHashCode();
-  }
-};
-
 // Log an object with some additional info.
 //
 // Pass in the number of elements in the array (or 0 if this is not an
@@ -143,6 +116,38 @@
 }
 
 void ReferenceTable::Dump(std::ostream& os, Table& entries) {
+  // Compare GC roots, first by class, then size, then address.
+  struct GcRootComparator {
+    bool operator()(GcRoot<mirror::Object> root1, GcRoot<mirror::Object> root2) const
+      // TODO: enable analysis when analysis can work with the STL.
+        NO_THREAD_SAFETY_ANALYSIS {
+      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+      // These GC roots are already forwarded in ReferenceTable::Dump. We sort by class since there
+      // are no suspend points which can happen during the sorting process. This works since
+      // we are guaranteed that the addresses of obj1, obj2, obj1->GetClass, obj2->GetClass wont
+      // change during the sorting process. The classes are forwarded by ref->GetClass().
+      mirror::Object* obj1 = root1.Read<kWithoutReadBarrier>();
+      mirror::Object* obj2 = root2.Read<kWithoutReadBarrier>();
+      DCHECK(obj1 != nullptr);
+      DCHECK(obj2 != nullptr);
+      Runtime* runtime = Runtime::Current();
+      DCHECK(!runtime->IsClearedJniWeakGlobal(obj1));
+      DCHECK(!runtime->IsClearedJniWeakGlobal(obj2));
+      // Sort by class...
+      if (obj1->GetClass() != obj2->GetClass()) {
+        return obj1->GetClass() < obj2->GetClass();
+      }
+      // ...then by size...
+      const size_t size1 = obj1->SizeOf();
+      const size_t size2 = obj2->SizeOf();
+      if (size1 != size2) {
+        return size1 < size2;
+      }
+      // ...and finally by address.
+      return obj1 < obj2;
+    }
+  };
+
   if (entries.empty()) {
     os << "  (empty)\n";
     return;
@@ -201,7 +206,7 @@
   if (sorted_entries.empty()) {
     return;
   }
-  std::sort(sorted_entries.begin(), sorted_entries.end(), ObjectComparator());
+  std::sort(sorted_entries.begin(), sorted_entries.end(), GcRootComparator());
 
   // Dump a summary of the whole table.
   os << "  Summary:\n";
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index cdf8d54..a82bc85 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -19,6 +19,7 @@
 
 #include "runtime.h"
 
+#include "mirror/art_method.h"
 #include "read_barrier-inl.h"
 
 namespace art {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 9dddf2f..57a849a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1219,6 +1219,12 @@
   }
 }
 
+void Runtime::VisitTransactionRoots(RootCallback* callback, void* arg) {
+  if (preinitialization_transaction_ != nullptr) {
+    preinitialization_transaction_->VisitRoots(callback, arg);
+  }
+}
+
 void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
   java_vm_->VisitRoots(callback, arg);
   sentinel_.VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
@@ -1238,9 +1244,7 @@
       verifier->VisitRoots(callback, arg);
     }
   }
-  if (preinitialization_transaction_ != nullptr) {
-    preinitialization_transaction_->VisitRoots(callback, arg);
-  }
+  VisitTransactionRoots(callback, arg);
   instrumentation_.VisitRoots(callback, arg);
 }
 
@@ -1249,6 +1253,15 @@
   VisitNonThreadRoots(callback, arg);
 }
 
+void Runtime::VisitThreadRoots(RootCallback* callback, void* arg) {
+  thread_list_->VisitRoots(callback, arg);
+}
+
+size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+                                gc::collector::GarbageCollector* collector) {
+  return thread_list_->FlipThreadRoots(thread_flip_visitor, flip_callback, collector);
+}
+
 void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
   VisitNonConcurrentRoots(callback, arg);
   VisitConcurrentRoots(callback, arg, flags);
@@ -1328,6 +1341,14 @@
   java_vm_->AllowNewWeakGlobals();
 }
 
+void Runtime::EnsureNewSystemWeaksDisallowed() {
+  // Lock and unlock the system weak locks once to ensure that no
+  // threads are still in the middle of adding new system weaks.
+  monitor_list_->EnsureNewMonitorsDisallowed();
+  intern_table_->EnsureNewInternsDisallowed();
+  java_vm_->EnsureNewWeakGlobalsDisallowed();
+}
+
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
   instruction_set_ = instruction_set;
   if ((instruction_set_ == kThumb2) || (instruction_set_ == kArm)) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d58fe3c..c5a8739 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -43,6 +43,9 @@
 
 namespace gc {
   class Heap;
+  namespace collector {
+    class GarbageCollector;
+  }  // namespace collector
 }  // namespace gc
 namespace mirror {
   class ArtMethod;
@@ -58,6 +61,7 @@
   class MethodVerifier;
 }  // namespace verifier
 class ClassLinker;
+class Closure;
 class DexFile;
 class InternTable;
 class JavaVMExt;
@@ -270,8 +274,9 @@
     return "2.1.0";
   }
 
-  void DisallowNewSystemWeaks() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DisallowNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnsureNewSystemWeaksDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
@@ -287,6 +292,17 @@
   void VisitNonThreadRoots(RootCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void VisitTransactionRoots(RootCallback* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Visit all of the thread roots.
+  void VisitThreadRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Flip thread roots from from-space refs to to-space refs.
+  size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+                         gc::collector::GarbageCollector* collector)
+      LOCKS_EXCLUDED(Locks::mutator_lock_);
+
   // Visit all other roots which must be done with mutators suspended.
   void VisitNonConcurrentRoots(RootCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index ae3eaf2..adf3480 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -20,6 +20,7 @@
 #include "base/casts.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext-inl.h"
+#include "mirror/art_field.h"
 #include "read_barrier.h"
 #include "thread-inl.h"
 #include "verify_object.h"
diff --git a/runtime/stack.h b/runtime/stack.h
index 233e1c3..b2b2072 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -24,6 +24,7 @@
 #include "dex_file.h"
 #include "gc_root.h"
 #include "mirror/object_reference.h"
+#include "read_barrier.h"
 #include "throw_location.h"
 #include "utils.h"
 #include "verify_object.h"
@@ -163,6 +164,9 @@
       const uint32_t* vreg_ptr = &vregs_[i];
       ref = reinterpret_cast<const StackReference<mirror::Object>*>(vreg_ptr)->AsMirrorPtr();
     }
+    if (kUseReadBarrier) {
+      ReadBarrier::AssertToSpaceInvariant(ref);
+    }
     if (kVerifyFlags & kVerifyReads) {
       VerifyObject(ref);
     }
@@ -230,6 +234,9 @@
     if (kVerifyFlags & kVerifyWrites) {
       VerifyObject(val);
     }
+    if (kUseReadBarrier) {
+      ReadBarrier::AssertToSpaceInvariant(val);
+    }
     uint32_t* vreg = &vregs_[i];
     reinterpret_cast<StackReference<mirror::Object>*>(vreg)->Assign(val);
     if (HasReferenceArray()) {
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 7aed8b0..a85d608 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -178,6 +178,11 @@
       // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
       Locks::mutator_lock_->SharedUnlock(this);
     } else {
+      // Run the flip function, if set.
+      Closure* flip_func = GetFlipFunction();
+      if (flip_func != nullptr) {
+        flip_func->Run(this);
+      }
       return static_cast<ThreadState>(old_state);
     }
   } while (true);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5690d51..17dfd8c 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -33,6 +33,7 @@
 
 #include "arch/context.h"
 #include "base/mutex.h"
+#include "base/timing_logger.h"
 #include "base/to_str.h"
 #include "class_linker-inl.h"
 #include "class_linker.h"
@@ -723,13 +724,34 @@
   return success;
 }
 
+Closure* Thread::GetFlipFunction() {
+  Atomic<Closure*>* atomic_func = reinterpret_cast<Atomic<Closure*>*>(&tlsPtr_.flip_function);
+  Closure* func;
+  do {
+    func = atomic_func->LoadRelaxed();
+    if (func == nullptr) {
+      return nullptr;
+    }
+  } while (!atomic_func->CompareExchangeWeakSequentiallyConsistent(func, nullptr));
+  DCHECK(func != nullptr);
+  return func;
+}
+
+void Thread::SetFlipFunction(Closure* function) {
+  CHECK(function != nullptr);
+  Atomic<Closure*>* atomic_func = reinterpret_cast<Atomic<Closure*>*>(&tlsPtr_.flip_function);
+  atomic_func->StoreSequentiallyConsistent(function);
+}
+
 void Thread::FullSuspendCheck() {
   VLOG(threads) << this << " self-suspending";
   ATRACE_BEGIN("Full suspend check");
   // Make thread appear suspended to other threads, release mutator_lock_.
+  tls32_.suspended_at_suspend_check = true;
   TransitionFromRunnableToSuspended(kSuspended);
   // Transition back to runnable noting requests to suspend, re-acquire share on mutator_lock_.
   TransitionFromSuspendedToRunnable();
+  tls32_.suspended_at_suspend_check = false;
   ATRACE_END();
   VLOG(threads) << this << " self-reviving";
 }
@@ -740,6 +762,20 @@
   bool is_daemon = false;
   Thread* self = Thread::Current();
 
+  // If flip_function is not null, it means we have run a checkpoint
+  // before the thread wakes up to execute the flip function and the
+  // thread roots haven't been forwarded.  So the following access to
+  // the roots (opeer or methods in the frames) would be bad. Run it
+  // here. TODO: clean up.
+  if (thread != nullptr) {
+    ScopedObjectAccessUnchecked soa(self);
+    Thread* this_thread = const_cast<Thread*>(thread);
+    Closure* flip_func = this_thread->GetFlipFunction();
+    if (flip_func != nullptr) {
+      flip_func->Run(this_thread);
+    }
+  }
+
   // Don't do this if we are aborting since the GC may have all the threads suspended. This will
   // cause ScopedObjectAccessUnchecked to deadlock.
   if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
@@ -980,6 +1016,19 @@
 }
 
 void Thread::DumpJavaStack(std::ostream& os) const {
+  // If flip_function is not null, it means we have run a checkpoint
+  // before the thread wakes up to execute the flip function and the
+  // thread roots haven't been forwarded.  So the following access to
+  // the roots (locks or methods in the frames) would be bad. Run it
+  // here. TODO: clean up.
+  {
+    Thread* this_thread = const_cast<Thread*>(this);
+    Closure* flip_func = this_thread->GetFlipFunction();
+    if (flip_func != nullptr) {
+      flip_func->Run(this_thread);
+    }
+  }
+
   // Dumping the Java stack involves the verifier for locks. The verifier operates under the
   // assumption that there is no exception pending on entry. Thus, stash any pending exception.
   // Thread::Current() instead of this in case a thread is dumping the stack of another suspended
@@ -1115,6 +1164,8 @@
   for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
     tlsPtr_.checkpoint_functions[i] = nullptr;
   }
+  tlsPtr_.flip_function = nullptr;
+  tls32_.suspended_at_suspend_check = false;
 }
 
 bool Thread::IsStillStarting() const {
@@ -1233,6 +1284,8 @@
   CHECK(tlsPtr_.checkpoint_functions[0] == nullptr);
   CHECK(tlsPtr_.checkpoint_functions[1] == nullptr);
   CHECK(tlsPtr_.checkpoint_functions[2] == nullptr);
+  CHECK(tlsPtr_.flip_function == nullptr);
+  CHECK_EQ(tls32_.suspended_at_suspend_check, false);
 
   // We may be deleting a still born thread.
   SetStateUnsafe(kTerminated);
diff --git a/runtime/thread.h b/runtime/thread.h
index c3a9751..7db9ba5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -216,6 +216,9 @@
   bool RequestCheckpoint(Closure* function)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
+  void SetFlipFunction(Closure* function);
+  Closure* GetFlipFunction();
+
   // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
   // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
   void FullSuspendCheck()
@@ -781,6 +784,12 @@
   mirror::Object* AllocTlab(size_t bytes);
   void SetTlab(uint8_t* start, uint8_t* end);
   bool HasTlab() const;
+  uint8_t* GetTlabStart() {
+    return tlsPtr_.thread_local_start;
+  }
+  uint8_t* GetTlabPos() {
+    return tlsPtr_.thread_local_pos;
+  }
 
   // Remove the suspend trigger for this thread by making the suspend_trigger_ TLS value
   // equal to a valid pointer.
@@ -848,6 +857,10 @@
     return tlsPtr_.nested_signal_state;
   }
 
+  bool IsSuspendedAtSuspendCheck() const {
+    return tls32_.suspended_at_suspend_check;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -953,7 +966,7 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false),
-      handling_signal_(false), padding_(0) {
+      handling_signal_(false), suspended_at_suspend_check(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -997,8 +1010,10 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // Padding to make the size aligned to 8.  Remove this if we add another 32 bit field.
-    int32_t padding_;
+    // True if the thread is suspended in FullSuspendCheck(). This is
+    // used to distinguish runnable threads that are suspended due to
+    // a normal suspend check from other threads.
+    bool32_t suspended_at_suspend_check;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
@@ -1025,7 +1040,7 @@
       pthread_self(0), last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
       thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
-      nested_signal_state(nullptr) {
+      nested_signal_state(nullptr), flip_function(nullptr) {
         for (size_t i = 0; i < kLockLevelCount; ++i) {
           held_mutexes[i] = nullptr;
         }
@@ -1142,6 +1157,9 @@
 
     // Recorded thread state for nested signals.
     jmp_buf* nested_signal_state;
+
+    // The function used for thread flip.
+    Closure* flip_function;
   } tlsPtr_;
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 6ec40d4..58e5b9d 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -356,6 +356,95 @@
   return count;
 }
 
+// A checkpoint/suspend-all hybrid to switch thread roots from
+// from-space to to-space refs. Used to synchronize threads at a point
+// to mark the initiation of marking while maintaining the to-space
+// invariant.
+size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+                                   gc::collector::GarbageCollector* collector) {
+  TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
+  const uint64_t start_time = NanoTime();
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  Locks::thread_list_lock_->AssertNotHeld(self);
+  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+  CHECK_NE(self->GetState(), kRunnable);
+
+  std::vector<Thread*> runnable_threads;
+  std::vector<Thread*> other_threads;
+
+  // Suspend all threads once.
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    // Update global suspend all state for attaching threads.
+    ++suspend_all_count_;
+    // Increment everybody's suspend count (except our own).
+    for (const auto& thread : list_) {
+      if (thread == self) {
+        continue;
+      }
+      thread->ModifySuspendCount(self, +1, false);
+    }
+  }
+
+  // Run the flip callback for the collector.
+  Locks::mutator_lock_->ExclusiveLock(self);
+  flip_callback->Run(self);
+  Locks::mutator_lock_->ExclusiveUnlock(self);
+  collector->RegisterPause(NanoTime() - start_time);
+
+  // Resume runnable threads.
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    --suspend_all_count_;
+    for (const auto& thread : list_) {
+      if (thread == self) {
+        continue;
+      }
+      // Set the flip function for both runnable and suspended threads
+      // because Thread::DumpState/DumpJavaStack() (invoked by a
+      // checkpoint) may cause the flip function to be run for a
+      // runnable/suspended thread before a runnable threads runs it
+      // for itself or we run it for a suspended thread below.
+      thread->SetFlipFunction(thread_flip_visitor);
+      if (thread->IsSuspendedAtSuspendCheck()) {
+        // The thread will resume right after the broadcast.
+        thread->ModifySuspendCount(self, -1, false);
+        runnable_threads.push_back(thread);
+      } else {
+        other_threads.push_back(thread);
+      }
+    }
+    Thread::resume_cond_->Broadcast(self);
+  }
+
+  // Run the closure on the other threads and let them resume.
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    for (const auto& thread : other_threads) {
+      Closure* flip_func = thread->GetFlipFunction();
+      if (flip_func != nullptr) {
+        flip_func->Run(thread);
+      }
+    }
+    // Run it for self.
+    thread_flip_visitor->Run(self);
+  }
+
+  // Resume other threads.
+  {
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (const auto& thread : other_threads) {
+      thread->ModifySuspendCount(self, -1, false);
+    }
+    Thread::resume_cond_->Broadcast(self);
+  }
+
+  return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
+}
+
 void ThreadList::SuspendAll() {
   Thread* self = Thread::Current();
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 6751bf5..d18315a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -27,6 +27,11 @@
 #include <list>
 
 namespace art {
+namespace gc {
+  namespace collector {
+    class GarbageCollector;
+  }  // namespac collector
+}  // namespace gc
 class Closure;
 class Thread;
 class TimingLogger;
@@ -95,6 +100,14 @@
   LOCKS_EXCLUDED(Locks::thread_list_lock_,
                  Locks::thread_suspend_count_lock_);
 
+  // Flip thread roots from from-space refs to to-space refs. Used by
+  // the concurrent copying collector.
+  size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+                         gc::collector::GarbageCollector* collector)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
   // Suspends all threads
   void SuspendAllForDebugger()
       LOCKS_EXCLUDED(Locks::mutator_lock_,
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 88944d7..474a066d 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1643,6 +1643,12 @@
       break;
 
     case Instruction::MOVE_EXCEPTION: {
+      // We do not allow MOVE_EXCEPTION as the first instruction in a method. This is a simple case
+      // where one entrypoint to the catch block is not actually an exception path.
+      if (work_insn_idx_ == 0) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "move-exception at pc 0x0";
+        break;
+      }
       /*
        * This statement can only appear as the first instruction in an exception handler. We verify
        * that as part of extracting the exception type from the catch block list.
diff --git a/test/132-daemon-locks-shutdown/expected.txt b/test/132-daemon-locks-shutdown/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/132-daemon-locks-shutdown/expected.txt
diff --git a/test/132-daemon-locks-shutdown/info.txt b/test/132-daemon-locks-shutdown/info.txt
new file mode 100644
index 0000000..f804064
--- /dev/null
+++ b/test/132-daemon-locks-shutdown/info.txt
@@ -0,0 +1 @@
+Tests that we can shut down the runtime with daemons still looping over locks.
diff --git a/test/132-daemon-locks-shutdown/src/Main.java b/test/132-daemon-locks-shutdown/src/Main.java
new file mode 100644
index 0000000..b5bbc8c
--- /dev/null
+++ b/test/132-daemon-locks-shutdown/src/Main.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test that daemon threads still contending for a lock don't make the runtime abort on shutdown.
+ */
+public class Main {
+
+    public final static int THREAD_COUNT = 32;
+
+    public static void main(String[] args) throws Exception {
+        Object sync = new Object();
+
+        for (int i = 0; i < THREAD_COUNT; i++) {
+            Thread t = new Thread(new Wait(sync));
+            t.setDaemon(true);
+            t.start();
+        }
+    }
+
+    private static class Wait implements Runnable {
+        private Object obj;
+
+        public Wait(Object obj) {
+            this.obj = obj;
+        }
+
+        public void run() {
+            for (;;) {
+                synchronized(obj) {
+                    try {
+                        obj.wait(1);
+                    } catch (Exception exc) {
+                        exc.printStackTrace(System.out);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 6cb08f4..019dc14 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -13,4 +13,5 @@
 b/18800943 (1)
 b/18800943 (2)
 MoveExc
+MoveExceptionOnEntry
 Done!
diff --git a/test/800-smali/smali/move_exception_on_entry.smali b/test/800-smali/smali/move_exception_on_entry.smali
new file mode 100644
index 0000000..e7da2e3
--- /dev/null
+++ b/test/800-smali/smali/move_exception_on_entry.smali
@@ -0,0 +1,30 @@
+.class public LMoveExceptionOnEntry;
+
+.super Ljava/lang/Object;
+
+# Test that we cannot have a catch-handler with move-exception at the beginning of a method.
+
+.method public static moveExceptionOnEntry(I)I
+.registers 4
+:Label1
+       move-exception v2
+       const v1, 100
+       move v0, p0
+       add-int/lit8 p0, p0, 1
+
+:Label2
+       invoke-static {v0}, LMoveExceptionOnEntry;->foo(I)V
+
+:Label3
+       return v1
+
+.catchall {:Label2 .. :Label3} :Label1
+.end method
+
+.method public static foo(I)I
+.registers 4
+:Label1
+       return-void
+
+.end method
+
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 2eda850..b23896d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -50,25 +50,33 @@
         // Create the test cases.
         testCases = new LinkedList<TestCase>();
         testCases.add(new TestCase("PackedSwitch", "PackedSwitch", "packedSwitch",
-          new Object[]{123}, null, 123));
+                new Object[]{123}, null, 123));
 
         testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100));
-        testCases.add(new TestCase("b/17978759", "B17978759", "test", null, new VerifyError(), null));
+        testCases.add(new TestCase("b/17978759", "B17978759", "test", null, new VerifyError(),
+                null));
         testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt",
-            new Object[]{100}, null, 100));
+                new Object[]{100}, null, 100));
         testCases.add(new TestCase("negLong", "negLong", "negLong", null, null, 122142L));
         testCases.add(new TestCase("sameFieldNames", "sameFieldNames", "getInt", null, null, 7));
         testCases.add(new TestCase("b/18380491", "B18380491ConcreteClass", "foo",
-            new Object[]{42}, null, 42));
+                new Object[]{42}, null, 42));
         testCases.add(new TestCase("invoke-super abstract", "B18380491ConcreteClass", "foo",
-            new Object[]{0}, new AbstractMethodError(), null));
-        testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
+                new Object[]{0}, new AbstractMethodError(), null));
+        testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null,
+                null, 2));
         testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0));
-        testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2));
+        testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null,
+                null, 2));
         testCases.add(new TestCase("b/18718277", "B18718277", "getInt", null, null, 0));
-        testCases.add(new TestCase("b/18800943 (1)", "B18800943_1", "n_a", null, new VerifyError(), 0));
-        testCases.add(new TestCase("b/18800943 (2)", "B18800943_2", "n_a", null, new VerifyError(), 0));
-        testCases.add(new TestCase("MoveExc", "MoveExc", "run", null, new ArithmeticException(), null));
+        testCases.add(new TestCase("b/18800943 (1)", "B18800943_1", "n_a", null, new VerifyError(),
+                0));
+        testCases.add(new TestCase("b/18800943 (2)", "B18800943_2", "n_a", null, new VerifyError(),
+                0));
+        testCases.add(new TestCase("MoveExc", "MoveExc", "run", null, new ArithmeticException(),
+                null));
+        testCases.add(new TestCase("MoveExceptionOnEntry", "MoveExceptionOnEntry",
+            "moveExceptionOnEntry", new Object[]{0}, new VerifyError(), null));
     }
 
     public void runTests() {
diff --git a/test/802-deoptimization/expected.txt b/test/802-deoptimization/expected.txt
new file mode 100644
index 0000000..d5f1f08
--- /dev/null
+++ b/test/802-deoptimization/expected.txt
@@ -0,0 +1 @@
+CatchHandlerOnEntryWithoutMoveException OK
diff --git a/test/802-deoptimization/info.txt b/test/802-deoptimization/info.txt
new file mode 100644
index 0000000..104d40f
--- /dev/null
+++ b/test/802-deoptimization/info.txt
@@ -0,0 +1 @@
+Tests related to deoptimization
diff --git a/test/802-deoptimization/smali/catch_handler_on_entry.smali b/test/802-deoptimization/smali/catch_handler_on_entry.smali
new file mode 100644
index 0000000..836101e
--- /dev/null
+++ b/test/802-deoptimization/smali/catch_handler_on_entry.smali
@@ -0,0 +1,29 @@
+.class public LCatchHandlerOnEntry;
+
+.super Ljava/lang/Object;
+
+# Test we can execute a method starting with a catch handler (without
+# move-exception instruction). This method must be called with parameter
+# initialized to 0.
+#
+# We execute the catch handler (Label1) for the first time with p0 == 0.
+# We save its value in v0, increment p0 to 1 and execute the div-int
+# instruction (Label2) which throws an ArithmeticException (division by zero).
+# That exception is caught by the catch handler so we execute it a second time.
+# Now p0 == 1. When we we execute the div-int instruction, it succeeds and we
+# return its result: this is the initial value of v1 because "v1 = v1 / 1".
+.method public static catchHandlerOnEntry(I)I
+.registers 4
+:Label1
+       const v1, 100
+       move v0, p0
+       add-int/lit8 p0, p0, 1
+
+:Label2
+       invoke-static {v0}, LCatchHandlerOnEntryHelper;->throwExceptionDuringDeopt(I)V
+
+:Label3
+       return v1
+
+.catchall {:Label2 .. :Label3} :Label1
+.end method
diff --git a/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java b/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java
new file mode 100644
index 0000000..a88d31b
--- /dev/null
+++ b/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Helper class used by smali test classes.
+ */
+public class CatchHandlerOnEntryHelper {
+
+  public static void throwExceptionDuringDeopt(int i) {
+    if (i == 0) {
+      DeoptimizationController.startDeoptomization();
+      throw new RuntimeException("Test exception");
+    } else {
+      DeoptimizationController.stopDeoptomization();
+    }
+  }
+}
diff --git a/test/802-deoptimization/src/DeoptimizationController.java b/test/802-deoptimization/src/DeoptimizationController.java
new file mode 100644
index 0000000..c031c07
--- /dev/null
+++ b/test/802-deoptimization/src/DeoptimizationController.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+/**
+ * Controls deoptimization using dalvik.system.VMDebug class.
+ */
+public class DeoptimizationController {
+  public static void startDeoptomization() {
+    try {
+      File tempFile;
+      try {
+        tempFile = File.createTempFile("test", ".trace");
+      } catch (IOException e) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        tempFile = File.createTempFile("test", ".trace");
+      }
+      tempFile.deleteOnExit();
+      String tempFileName = tempFile.getPath();
+
+      VMDebug.startMethodTracing(tempFileName, 0, 0, false, 1000);
+      if (VMDebug.getMethodTracingMode() == 0) {
+        throw new IllegalStateException("Not tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    }
+  }
+
+  public static void stopDeoptomization() {
+    try {
+      VMDebug.stopMethodTracing();
+      if (VMDebug.getMethodTracingMode() != 0) {
+        throw new IllegalStateException("Still tracing.");
+      }
+    } catch (Exception exc) {
+      exc.printStackTrace(System.err);
+    }
+  }
+
+  private static class VMDebug {
+    private static final Method startMethodTracingMethod;
+    private static final Method stopMethodTracingMethod;
+    private static final Method getMethodTracingModeMethod;
+
+    static {
+      try {
+        Class<?> c = Class.forName("dalvik.system.VMDebug");
+        startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+            Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+        stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+        getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void startMethodTracing(String filename, int bufferSize, int flags,
+        boolean samplingEnabled, int intervalUs) throws Exception {
+      startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+          intervalUs);
+    }
+    public static void stopMethodTracing() throws Exception {
+      stopMethodTracingMethod.invoke(null);
+    }
+    public static int getMethodTracingMode() throws Exception {
+      return (int) getMethodTracingModeMethod.invoke(null);
+    }
+  }
+}
diff --git a/test/802-deoptimization/src/Main.java b/test/802-deoptimization/src/Main.java
new file mode 100644
index 0000000..c8780de
--- /dev/null
+++ b/test/802-deoptimization/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  private static final int EXPECTED_RESULT = 100;
+  private static final int PARAMETER_VALUE = 0;
+
+  public static void main(String[] args) throws Throwable {
+    testCatchHandlerOnEntryWithoutMoveException();
+  }
+
+  /**
+   * Tests we correctly execute a method starting with a catch handler without
+   * move-exception instruction when throwing an exception during deoptimization.
+   */
+  private static void testCatchHandlerOnEntryWithoutMoveException() throws Throwable {
+    Class<?> c = Class.forName("CatchHandlerOnEntry");
+    Method m = c.getMethod("catchHandlerOnEntry", int.class);
+    Object result = m.invoke(null, new Object[]{PARAMETER_VALUE});
+    int intResult = ((Integer) result).intValue();
+    if (intResult == EXPECTED_RESULT) {
+      System.out.println("CatchHandlerOnEntryWithoutMoveException OK");
+    } else {
+      System.out.println("CatchHandlerOnEntryWithoutMoveException KO: result==" + intResult);
+    }
+  }
+}
+
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index bd9941d..b1e969d 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -329,8 +329,9 @@
 TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
 
 # Known broken tests for the optimizing compiler.
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \
-  099-vmdebug \ # b/18098594
+TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 099-vmdebug # b/18098594
+TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 802-deoptimization # b/18547544
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \