Merge "Add conditional move support to x86 and allow GenMinMax to use it"
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 395c788..b06ebcf 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -241,7 +241,7 @@
   kArmFirst = 0,
   kArm16BitData = kArmFirst,  // DATA   [0] rd[15..0].
   kThumbAdcRR,       // adc   [0100000101] rm[5..3] rd[2..0].
-  kThumbAddRRI3,     // add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/
+  kThumbAddRRI3,     // add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0].
   kThumbAddRI8,      // add(2)  [00110] rd[10..8] imm_8[7..0].
   kThumbAddRRR,      // add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0].
   kThumbAddRRLH,     // add(4)  [01000100] H12[01] rm[5..3] rd[2..0].
@@ -326,20 +326,23 @@
   kThumb2Vaddd,      // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
   kThumb2Vdivs,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
   kThumb2Vdivd,      // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
+  kThumb2VmlaF64,    // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
   kThumb2VcvtIF,     // vcvt.F32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
   kThumb2VcvtID,     // vcvt.F64 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
   kThumb2VcvtFI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
   kThumb2VcvtDI,     // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
   kThumb2VcvtFd,     // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
   kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
+  kThumb2VcvtF64S32,  // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
+  kThumb2VcvtF64U32,  // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
   kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
   kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
   kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
   kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
   kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
   kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
-  kThumb2StrRRI8Predec,  // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0]*/
-  kThumb2LdrRRI8Predec,  // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0]*/
+  kThumb2StrRRI8Predec,  // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
+  kThumb2LdrRRI8Predec,  // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
   kThumb2Cbnz,       // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
   kThumb2Cbz,        // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
   kThumb2AddRRI12,   // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 820b3aa..00939ec 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -457,6 +457,10 @@
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VmlaF64,     0xee000b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
+                 "vmla", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
@@ -481,6 +485,14 @@
                  kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtF64S32,   0xeeb80bc0,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
+    ENCODING_MAP(kThumb2VcvtF64U32,   0xeeb80b40,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 8af9cdd..1a9d9c5 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -141,9 +141,24 @@
     case Instruction::DOUBLE_TO_INT:
       op = kThumb2VcvtDI;
       break;
-    case Instruction::LONG_TO_DOUBLE:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src);
+    case Instruction::LONG_TO_DOUBLE: {
+      rl_src = LoadValueWide(rl_src, kFPReg);
+      src_reg = S2d(rl_src.low_reg, rl_src.high_reg);
+      rl_result = EvalLoc(rl_dest, kFPReg, true);
+      // TODO: clean up AllocTempDouble so that its result has the double bits set.
+      int tmp1 = AllocTempDouble();
+      int tmp2 = AllocTempDouble();
+
+      NewLIR2(kThumb2VcvtF64S32, tmp1 | ARM_FP_DOUBLE, (src_reg & ~ARM_FP_DOUBLE) + 1);
+      NewLIR2(kThumb2VcvtF64U32, S2d(rl_result.low_reg, rl_result.high_reg), (src_reg & ~ARM_FP_DOUBLE));
+      LoadConstantWide(tmp2, tmp2 + 1, 0x41f0000000000000LL);
+      NewLIR3(kThumb2VmlaF64, S2d(rl_result.low_reg, rl_result.high_reg), tmp1 | ARM_FP_DOUBLE,
+              tmp2 | ARM_FP_DOUBLE);
+      FreeTemp(tmp1);
+      FreeTemp(tmp2);
+      StoreValueWide(rl_dest, rl_result);
       return;
+    }
     case Instruction::FLOAT_TO_LONG:
       GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
       return;
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index d80ae3b..7591041 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -255,7 +255,7 @@
   "ror"};
 
 /* Decode and print a ARM register name */
-static char* DecodeRegList(int opcode, int vector, char* buf) {
+static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) {
   int i;
   bool printed = false;
   buf[0] = 0;
@@ -268,20 +268,20 @@
         reg_id = r15pc;
       }
       if (printed) {
-        sprintf(buf + strlen(buf), ", r%d", reg_id);
+        snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id);
       } else {
         printed = true;
-        sprintf(buf, "r%d", reg_id);
+        snprintf(buf, buf_size, "r%d", reg_id);
       }
     }
   }
   return buf;
 }
 
-static char*  DecodeFPCSRegList(int count, int base, char* buf) {
-  sprintf(buf, "s%d", base);
+static char*  DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) {
+  snprintf(buf, buf_size, "s%d", base);
   for (int i = 1; i < count; i++) {
-    sprintf(buf + strlen(buf), ", s%d", base + i);
+    snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i);
   }
   return buf;
 }
@@ -333,7 +333,7 @@
          switch (*fmt++) {
            case 'H':
              if (operand != 0) {
-               sprintf(tbuf, ", %s %d", shift_names[operand & 0x3], operand >> 2);
+               snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2);
              } else {
                strcpy(tbuf, "");
              }
@@ -373,41 +373,41 @@
              break;
            case 'n':
              operand = ~ExpandImmediate(operand);
-             sprintf(tbuf, "%d [%#x]", operand, operand);
+             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
              break;
            case 'm':
              operand = ExpandImmediate(operand);
-             sprintf(tbuf, "%d [%#x]", operand, operand);
+             snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
              break;
            case 's':
-             sprintf(tbuf, "s%d", operand & ARM_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
              break;
            case 'S':
-             sprintf(tbuf, "d%d", (operand & ARM_FP_REG_MASK) >> 1);
+             snprintf(tbuf, arraysize(tbuf), "d%d", (operand & ARM_FP_REG_MASK) >> 1);
              break;
            case 'h':
-             sprintf(tbuf, "%04x", operand);
+             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
              break;
            case 'M':
            case 'd':
-             sprintf(tbuf, "%d", operand);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand);
              break;
            case 'C':
              DCHECK_LT(operand, static_cast<int>(
                  sizeof(core_reg_names)/sizeof(core_reg_names[0])));
-             sprintf(tbuf, "%s", core_reg_names[operand]);
+             snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]);
              break;
            case 'E':
-             sprintf(tbuf, "%d", operand*4);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
              break;
            case 'F':
-             sprintf(tbuf, "%d", operand*2);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
              break;
            case 'c':
              strcpy(tbuf, cc_names[operand]);
              break;
            case 't':
-             sprintf(tbuf, "0x%08x (L%p)",
+             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
                  reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 +
                  (operand << 1),
                  lir->target);
@@ -419,7 +419,7 @@
                  (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) &
                  ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
                  0xfffffffc;
-             sprintf(tbuf, "%p", reinterpret_cast<void *>(target));
+             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target));
              break;
           }
 
@@ -428,13 +428,13 @@
              strcpy(tbuf, "see above");
              break;
            case 'R':
-             DecodeRegList(lir->opcode, operand, tbuf);
+             DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf));
              break;
            case 'P':
-             DecodeFPCSRegList(operand, 16, tbuf);
+             DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf));
              break;
            case 'Q':
-             DecodeFPCSRegList(operand, 0, tbuf);
+             DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf));
              break;
            default:
              strcpy(tbuf, "DecodeError1");
@@ -461,7 +461,7 @@
 
     for (i = 0; i < kArmRegEnd; i++) {
       if (mask & (1ULL << i)) {
-        sprintf(num, "%d ", i);
+        snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
@@ -475,8 +475,9 @@
 
     /* Memory bits */
     if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
-              DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
+      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
+               DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+               DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.h b/compiler/dex/quick/dex_file_to_method_inliner_map.h
index 6d5b889..215dc12 100644
--- a/compiler/dex/quick/dex_file_to_method_inliner_map.h
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.h
@@ -40,7 +40,9 @@
     DexFileToMethodInlinerMap();
     ~DexFileToMethodInlinerMap();
 
-    DexFileMethodInliner* GetMethodInliner(const DexFile* dex_file) LOCKS_EXCLUDED(lock_);
+    DexFileMethodInliner* GetMethodInliner(const DexFile* dex_file) NO_THREAD_SAFETY_ANALYSIS;
+        // TODO: There is an irregular non-scoped use of locks that defeats annotalysis with -O0.
+        // Fix the NO_THREAD_SAFETY_ANALYSIS when this works and add the appropriate LOCKS_EXCLUDED.
 
   private:
     ReaderWriterMutex lock_;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index a426cc7..6b4cbd4 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -330,21 +330,22 @@
 void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_double,
                       bool is_object) {
   int field_offset;
-  int ssb_index;
+  int storage_index;
   bool is_volatile;
   bool is_referrers_class;
+  bool is_initialized;
   bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo(
       field_idx, mir_graph_->GetCurrentDexCompilationUnit(), true,
-      &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
+      &field_offset, &storage_index, &is_referrers_class, &is_volatile, &is_initialized);
   if (fast_path && !SLOW_FIELD_PATH) {
     DCHECK_GE(field_offset, 0);
-    int rBase;
+    int r_base;
     if (is_referrers_class) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
-      rBase = AllocTemp();
+      r_base = AllocTemp();
       LoadWordDisp(rl_method.low_reg,
-                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(), rBase);
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
       if (IsTemp(rl_method.low_reg)) {
         FreeTemp(rl_method.low_reg);
       }
@@ -352,33 +353,44 @@
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized.
       // TODO: remove initialized check now that we are initializing classes in the compiler driver.
-      DCHECK_GE(ssb_index, 0);
+      DCHECK_GE(storage_index, 0);
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
       int r_method = TargetReg(kArg1);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      rBase = TargetReg(kArg0);
-      LockTemp(rBase);
+      r_base = TargetReg(kArg0);
+      LockTemp(r_base);
       LoadWordDisp(r_method,
-                   mirror::ArtMethod::DexCacheInitializedStaticStorageOffset().Int32Value(),
-                   rBase);
-      LoadWordDisp(rBase,
-                   mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
-                   sizeof(int32_t*) * ssb_index, rBase);
-      // rBase now points at appropriate static storage base (Class*)
-      // or NULL if not initialized. Check for NULL and call helper if NULL.
-      // TUNING: fast path should fall through
-      LIR* branch_over = OpCmpImmBranch(kCondNe, rBase, 0, NULL);
-      LoadConstant(TargetReg(kArg0), ssb_index);
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), ssb_index, true);
-      if (cu_->instruction_set == kMips) {
-        // For Arm, kRet0 = kArg0 = rBase, for Mips, we need to copy
-        OpRegCopy(rBase, TargetReg(kRet0));
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   r_base);
+      LoadWordDisp(r_base, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+                   sizeof(int32_t*) * storage_index, r_base);
+      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
+      if (!is_initialized) {
+        // Check if r_base is NULL or a not yet initialized class.
+        // TUNING: fast path should fall through
+        LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
+        int r_tmp = TargetReg(kArg2);
+        LockTemp(r_tmp);
+        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
+        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
+        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
+                                                 NULL);
+
+        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
+        unresolved_branch->target = unresolved_target;
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
+                             true);
+        // Copy helper's result into r_base, a no-op on all but MIPS.
+        OpRegCopy(r_base, TargetReg(kRet0));
+
+        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
+        initialized_branch->target = initialized_target;
+
+        FreeTemp(r_tmp);
       }
-      LIR* skip_target = NewLIR0(kPseudoTargetLabel);
-      branch_over->target = skip_target;
       FreeTemp(r_method);
     }
     // rBase now holds static storage base
@@ -391,18 +403,18 @@
       GenMemBarrier(kStoreStore);
     }
     if (is_long_or_double) {
-      StoreBaseDispWide(rBase, field_offset, rl_src.low_reg,
+      StoreBaseDispWide(r_base, field_offset, rl_src.low_reg,
                         rl_src.high_reg);
     } else {
-      StoreWordDisp(rBase, field_offset, rl_src.low_reg);
+      StoreWordDisp(r_base, field_offset, rl_src.low_reg);
     }
     if (is_volatile) {
       GenMemBarrier(kStoreLoad);
     }
     if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
-      MarkGCCard(rl_src.low_reg, rBase);
+      MarkGCCard(rl_src.low_reg, r_base);
     }
-    FreeTemp(rBase);
+    FreeTemp(r_base);
   } else {
     FlushAllRegs();  // Everything to home locations
     ThreadOffset setter_offset =
@@ -416,64 +428,77 @@
 void Mir2Lir::GenSget(uint32_t field_idx, RegLocation rl_dest,
                       bool is_long_or_double, bool is_object) {
   int field_offset;
-  int ssb_index;
+  int storage_index;
   bool is_volatile;
   bool is_referrers_class;
+  bool is_initialized;
   bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo(
       field_idx, mir_graph_->GetCurrentDexCompilationUnit(), false,
-      &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
+      &field_offset, &storage_index, &is_referrers_class, &is_volatile, &is_initialized);
   if (fast_path && !SLOW_FIELD_PATH) {
     DCHECK_GE(field_offset, 0);
-    int rBase;
+    int r_base;
     if (is_referrers_class) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
-      rBase = AllocTemp();
+      r_base = AllocTemp();
       LoadWordDisp(rl_method.low_reg,
-                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(), rBase);
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized
-      // TODO: remove initialized check now that we are initializing classes in the compiler driver.
-      DCHECK_GE(ssb_index, 0);
+      DCHECK_GE(storage_index, 0);
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
       int r_method = TargetReg(kArg1);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      rBase = TargetReg(kArg0);
-      LockTemp(rBase);
+      r_base = TargetReg(kArg0);
+      LockTemp(r_base);
       LoadWordDisp(r_method,
-                   mirror::ArtMethod::DexCacheInitializedStaticStorageOffset().Int32Value(),
-                   rBase);
-      LoadWordDisp(rBase, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
-                   sizeof(int32_t*) * ssb_index, rBase);
-      // rBase now points at appropriate static storage base (Class*)
-      // or NULL if not initialized. Check for NULL and call helper if NULL.
-      // TUNING: fast path should fall through
-      LIR* branch_over = OpCmpImmBranch(kCondNe, rBase, 0, NULL);
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), ssb_index, true);
-      if (cu_->instruction_set == kMips) {
-        // For Arm, kRet0 = kArg0 = rBase, for Mips, we need to copy
-        OpRegCopy(rBase, TargetReg(kRet0));
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   r_base);
+      LoadWordDisp(r_base, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+                   sizeof(int32_t*) * storage_index, r_base);
+      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
+      if (!is_initialized) {
+        // Check if r_base is NULL or a not yet initialized class.
+        // TUNING: fast path should fall through
+        LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
+        int r_tmp = TargetReg(kArg2);
+        LockTemp(r_tmp);
+        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
+        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
+        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
+                                                 NULL);
+
+        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
+        unresolved_branch->target = unresolved_target;
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
+                             true);
+        // Copy helper's result into r_base, a no-op on all but MIPS.
+        OpRegCopy(r_base, TargetReg(kRet0));
+
+        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
+        initialized_branch->target = initialized_target;
+
+        FreeTemp(r_tmp);
       }
-      LIR* skip_target = NewLIR0(kPseudoTargetLabel);
-      branch_over->target = skip_target;
       FreeTemp(r_method);
     }
-    // rBase now holds static storage base
+    // r_base now holds static storage base
     RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
     if (is_volatile) {
       GenMemBarrier(kLoadLoad);
     }
     if (is_long_or_double) {
-      LoadBaseDispWide(rBase, field_offset, rl_result.low_reg,
+      LoadBaseDispWide(r_base, field_offset, rl_result.low_reg,
                        rl_result.high_reg, INVALID_SREG);
     } else {
-      LoadWordDisp(rBase, field_offset, rl_result.low_reg);
+      LoadWordDisp(r_base, field_offset, rl_result.low_reg);
     }
-    FreeTemp(rBase);
+    FreeTemp(r_base);
     if (is_long_or_double) {
       StoreValueWide(rl_dest, rl_result);
     } else {
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 869706f..1aee06c 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -180,34 +180,35 @@
              }
              break;
            case 's':
-             sprintf(tbuf, "$f%d", operand & MIPS_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "$f%d", operand & MIPS_FP_REG_MASK);
              break;
            case 'S':
              DCHECK_EQ(((operand & MIPS_FP_REG_MASK) & 1), 0);
-             sprintf(tbuf, "$f%d", operand & MIPS_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "$f%d", operand & MIPS_FP_REG_MASK);
              break;
            case 'h':
-             sprintf(tbuf, "%04x", operand);
+             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
              break;
            case 'M':
            case 'd':
-             sprintf(tbuf, "%d", operand);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand);
              break;
            case 'D':
-             sprintf(tbuf, "%d", operand+1);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand+1);
              break;
            case 'E':
-             sprintf(tbuf, "%d", operand*4);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
              break;
            case 'F':
-             sprintf(tbuf, "%d", operand*2);
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
              break;
            case 't':
-             sprintf(tbuf, "0x%08x (L%p)", reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 +
-                     (operand << 2), lir->target);
+             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
+                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 2),
+                      lir->target);
              break;
            case 'T':
-             sprintf(tbuf, "0x%08x", operand << 2);
+             snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
              break;
            case 'u': {
              int offset_1 = lir->operands[0];
@@ -215,7 +216,7 @@
              uintptr_t target =
                  (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) +
                  (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc;
-             sprintf(tbuf, "%p", reinterpret_cast<void*>(target));
+             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void*>(target));
              break;
           }
 
@@ -257,7 +258,7 @@
 
     for (i = 0; i < kMipsRegEnd; i++) {
       if (mask & (1ULL << i)) {
-        sprintf(num, "%d ", i);
+        snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
@@ -270,8 +271,9 @@
     }
     /* Memory bits */
     if (mips_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info),
-              DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : "");
+      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
+               DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info),
+               DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 0b8c07e..b281063 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -270,7 +270,7 @@
 
     for (i = 0; i < kX86RegEnd; i++) {
       if (mask & (1ULL << i)) {
-        sprintf(num, "%d ", i);
+        snprintf(num, arraysize(num), "%d ", i);
         strcat(buf, num);
       }
     }
@@ -280,8 +280,9 @@
     }
     /* Memory bits */
     if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
-              (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
+      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
+               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
+               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9cffb3c..5edc8b6 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -992,14 +992,16 @@
 }
 
 bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                            bool is_put, int* field_offset, int* ssb_index,
-                                            bool* is_referrers_class, bool* is_volatile) {
+                                            bool is_put, int* field_offset, int* storage_index,
+                                            bool* is_referrers_class, bool* is_volatile,
+                                            bool* is_initialized) {
   ScopedObjectAccess soa(Thread::Current());
   // Conservative defaults.
   *field_offset = -1;
-  *ssb_index = -1;
+  *storage_index = -1;
   *is_referrers_class = false;
   *is_volatile = true;
+  *is_initialized = false;
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie isn't static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && resolved_field->IsStatic()) {
@@ -1010,6 +1012,7 @@
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
       if (fields_class == referrer_class) {
         *is_referrers_class = true;  // implies no worrying about class initialization
+        *is_initialized = true;
         *field_offset = resolved_field->GetOffset().Int32Value();
         *is_volatile = resolved_field->IsVolatile();
         stats_->ResolvedLocalStaticField();
@@ -1034,17 +1037,19 @@
         }
         bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal();
         if (access_ok && !is_write_to_final_from_wrong_class) {
-          // We have the resolved field, we must make it into a ssbIndex for the referrer
-          // in its static storage base (which may fail if it doesn't have a slot for it)
+          // We have the resolved field, we must make it into a index for the referrer
+          // in its static storage (which may fail if it doesn't have a slot for it)
           // TODO: for images we can elide the static storage base null check
           // if we know there's a non-null entry in the image
           mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
           if (fields_class->GetDexCache() == dex_cache) {
             // common case where the dex cache of both the referrer and the field are the same,
             // no need to search the dex file
-            *ssb_index = fields_class->GetDexTypeIndex();
+            *storage_index = fields_class->GetDexTypeIndex();
             *field_offset = resolved_field->GetOffset().Int32Value();
             *is_volatile = resolved_field->IsVolatile();
+            *is_initialized = fields_class->IsInitialized() &&
+                CanAssumeTypeIsPresentInDexCache(*mUnit->GetDexFile(), *storage_index);
             stats_->ResolvedStaticField();
             return true;
           }
@@ -1057,9 +1062,11 @@
                mUnit->GetDexFile()->FindTypeId(mUnit->GetDexFile()->GetIndexForStringId(*string_id));
             if (type_id != NULL) {
               // medium path, needs check of static storage base being initialized
-              *ssb_index = mUnit->GetDexFile()->GetIndexForTypeId(*type_id);
+              *storage_index = mUnit->GetDexFile()->GetIndexForTypeId(*type_id);
               *field_offset = resolved_field->GetOffset().Int32Value();
               *is_volatile = resolved_field->IsVolatile();
+              *is_initialized = fields_class->IsInitialized() &&
+                  CanAssumeTypeIsPresentInDexCache(*mUnit->GetDexFile(), *storage_index);
               stats_->ResolvedStaticField();
               return true;
             }
@@ -2184,11 +2191,6 @@
         }
         soa.Self()->AssertNoPendingException();
       }
-      // If successfully initialized place in SSB array.
-      if (klass->IsInitialized()) {
-        int32_t ssb_index = klass->GetDexTypeIndex();
-        klass->GetDexCache()->GetInitializedStaticStorage()->Set(ssb_index, klass.get());
-      }
     }
     // Record the final class status if necessary.
     ClassReference ref(manager->GetDexFile(), class_def_index);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index f4cc84d..9e31624 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -172,8 +172,7 @@
 
   // Callbacks from compiler to see what runtime checks must be generated.
 
-  bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx)
-      LOCKS_EXCLUDED(Locks::mutator_lock_);
+  bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx);
 
   bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
@@ -198,8 +197,8 @@
   // Can we fastpath static field access? Computes field's offset, volatility and whether the
   // field is within the referrer (which can avoid checking class initialization).
   bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
-                              int* field_offset, int* ssb_index,
-                              bool* is_referrers_class, bool* is_volatile)
+                              int* field_offset, int* storage_index,
+                              bool* is_referrers_class, bool* is_volatile, bool* is_initialized)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Can we fastpath a interface, super class or virtual method call? Computes method's vtable
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 02654ad..556dec2 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -316,7 +316,6 @@
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != NULL && !IsImageClass(klass)) {
         dex_cache->SetResolvedType(i, NULL);
-        dex_cache->GetInitializedStaticStorage()->Set(i, NULL);
       }
     }
     for (size_t i = 0; i < dex_cache->NumResolvedMethods(); i++) {
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index b206a25..6423cd7 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -199,8 +199,6 @@
   //----------------------------------------------------------------------------
   llvm::Value* EmitLoadDexCacheAddr(art::MemberOffset dex_cache_offset);
 
-  llvm::Value* EmitLoadDexCacheStaticStorageFieldAddr(uint32_t type_idx);
-
   llvm::Value* EmitLoadDexCacheResolvedTypeFieldAddr(uint32_t type_idx);
 
   llvm::Value* EmitLoadDexCacheResolvedMethodFieldAddr(uint32_t method_idx);
@@ -287,8 +285,6 @@
 
   llvm::Value* Expand_LoadDeclaringClassSSB(llvm::Value* method_object_addr);
 
-  llvm::Value* Expand_LoadClassSSBFromDexCache(llvm::Value* type_idx_value);
-
   llvm::Value*
   Expand_GetSDCalleeMethodObjAddrFast(llvm::Value* callee_method_idx_value);
 
@@ -720,16 +716,6 @@
 }
 
 llvm::Value*
-GBCExpanderPass::EmitLoadDexCacheStaticStorageFieldAddr(uint32_t type_idx) {
-  llvm::Value* static_storage_dex_cache_addr =
-    EmitLoadDexCacheAddr(art::mirror::ArtMethod::DexCacheInitializedStaticStorageOffset());
-
-  llvm::Value* type_idx_value = irb_.getPtrEquivInt(type_idx);
-
-  return EmitArrayGEP(static_storage_dex_cache_addr, type_idx_value, kObject);
-}
-
-llvm::Value*
 GBCExpanderPass::EmitLoadDexCacheResolvedTypeFieldAddr(uint32_t type_idx) {
   llvm::Value* resolved_type_dex_cache_addr =
     EmitLoadDexCacheAddr(art::mirror::ArtMethod::DexCacheResolvedTypesOffset());
@@ -1213,17 +1199,6 @@
 }
 
 llvm::Value*
-GBCExpanderPass::Expand_LoadClassSSBFromDexCache(llvm::Value* type_idx_value) {
-  uint32_t type_idx =
-    llvm::cast<llvm::ConstantInt>(type_idx_value)->getZExtValue();
-
-  llvm::Value* storage_field_addr =
-    EmitLoadDexCacheStaticStorageFieldAddr(type_idx);
-
-  return irb_.CreateLoad(storage_field_addr, kTBAARuntimeInfo);
-}
-
-llvm::Value*
 GBCExpanderPass::Expand_GetSDCalleeMethodObjAddrFast(llvm::Value* callee_method_idx_value) {
   uint32_t callee_method_idx =
     llvm::cast<llvm::ConstantInt>(callee_method_idx_value)->getZExtValue();
@@ -1837,21 +1812,31 @@
   llvm::BasicBlock* block_load_static =
     CreateBasicBlockWithDexPC(dex_pc, "load_static");
 
+  llvm::BasicBlock* block_check_init = CreateBasicBlockWithDexPC(dex_pc, "init");
   llvm::BasicBlock* block_cont = CreateBasicBlockWithDexPC(dex_pc, "cont");
 
   // Load static storage from dex cache
-  llvm::Value* storage_field_addr =
-    EmitLoadDexCacheStaticStorageFieldAddr(type_idx);
+  llvm::Value* storage_field_addr = EmitLoadDexCacheResolvedTypeFieldAddr(type_idx);
 
   llvm::Value* storage_object_addr = irb_.CreateLoad(storage_field_addr, kTBAARuntimeInfo);
 
-  llvm::BasicBlock* block_original = irb_.GetInsertBlock();
+  // Test: Is the class resolved?
+  llvm::Value* equal_null = irb_.CreateICmpEQ(storage_object_addr, irb_.getJNull());
 
-  // Test: Is the static storage of this class initialized?
-  llvm::Value* equal_null =
-    irb_.CreateICmpEQ(storage_object_addr, irb_.getJNull());
+  irb_.CreateCondBr(equal_null, block_load_static, block_check_init, kUnlikely);
 
-  irb_.CreateCondBr(equal_null, block_load_static, block_cont, kUnlikely);
+  // storage_object_addr != null, so check if its initialized.
+  irb_.SetInsertPoint(block_check_init);
+
+  llvm::Value* class_status =
+      irb_.LoadFromObjectOffset(storage_object_addr,
+                                art::mirror::Class::StatusOffset().Int32Value(),
+                                irb_.getJIntTy(), kTBAAHeapInstance);
+
+  llvm::Value* is_not_initialized =
+      irb_.CreateICmpULT(class_status, irb_.getInt32(art::mirror::Class::kStatusInitialized));
+
+  irb_.CreateCondBr(is_not_initialized, block_load_static, block_cont, kUnlikely);
 
   // Failback routine to load the class object
   irb_.SetInsertPoint(block_load_static);
@@ -1880,9 +1865,8 @@
 
   llvm::PHINode* phi = irb_.CreatePHI(irb_.getJObjectTy(), 2);
 
-  phi->addIncoming(storage_object_addr, block_original);
+  phi->addIncoming(storage_object_addr, block_check_init);
   phi->addIncoming(loaded_storage_object_addr, block_after_load_static);
-
   return phi;
 }
 
@@ -1895,10 +1879,11 @@
   int ssb_index;
   bool is_referrers_class;
   bool is_volatile;
+  bool is_initialized;
 
   bool is_fast_path = driver_->ComputeStaticFieldInfo(
     field_idx, dex_compilation_unit_, false,
-    &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
+    &field_offset, &ssb_index, &is_referrers_class, &is_volatile, &is_initialized);
 
   llvm::Value* static_field_value;
 
@@ -1979,10 +1964,11 @@
   int ssb_index;
   bool is_referrers_class;
   bool is_volatile;
+  bool is_initialized;
 
   bool is_fast_path = driver_->ComputeStaticFieldInfo(
     field_idx, dex_compilation_unit_, true,
-    &field_offset, &ssb_index, &is_referrers_class, &is_volatile);
+    &field_offset, &ssb_index, &is_referrers_class, &is_volatile, &is_initialized);
 
   if (!is_fast_path) {
     llvm::Function* runtime_func;
@@ -3360,9 +3346,6 @@
     case IntrinsicHelper::LoadDeclaringClassSSB: {
       return Expand_LoadDeclaringClassSSB(call_inst.getArgOperand(0));
     }
-    case IntrinsicHelper::LoadClassSSBFromDexCache: {
-      return Expand_LoadClassSSBFromDexCache(call_inst.getArgOperand(0));
-    }
     case IntrinsicHelper::InitializeAndLoadClassSSB: {
       return ExpandToRuntime(InitializeStaticStorage, call_inst);
     }
diff --git a/compiler/llvm/intrinsic_func_list.def b/compiler/llvm/intrinsic_func_list.def
index 92537ba..887a626 100644
--- a/compiler/llvm/intrinsic_func_list.def
+++ b/compiler/llvm/intrinsic_func_list.def
@@ -863,13 +863,6 @@
                           kJavaObjectTy,
                           _EXPAND_ARG1(kJavaMethodTy))
 
-// JavaObject* art_portable_load_class_ssb_from_dex_cache(uint32_t type_idx)
-_EVAL_DEF_INTRINSICS_FUNC(LoadClassSSBFromDexCache,
-                          art_portable_load_class_ssb_from_dex_cache,
-                          kAttrReadOnly | kAttrNoThrow,
-                          kJavaObjectTy,
-                          _EXPAND_ARG1(kInt32ConstantTy))
-
 // JavaObject* art_portable_init_and_load_class_ssb(uint32_t type_idx,
 //                                              Method* referrer,
 //                                              Thread* thread)
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 71f70c4..68626f6 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -683,7 +683,7 @@
         uint32_t coproc = (instr >> 8) & 0xF;
         uint32_t op4 = (instr >> 4) & 0x1;
 
-        if (coproc == 10 || coproc == 11) {   // 101x
+        if (coproc == 0xA || coproc == 0xB) {   // 101x
           if (op3 < 0x20 && (op3 & ~5) != 0) {     // 0xxxxx and not 000x0x
             // Extension register load/store instructions
             // |1111|110|00000|0000|1111|110|0|00000000|
@@ -708,6 +708,11 @@
                 opcode << (L == 1 ? "vldr" : "vstr");
                 args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
                      << (imm8 << 2) << "]";
+                if (Rn.r == 15 && U == 1) {
+                  intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
+                  lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2);
+                  args << StringPrintf("  ; 0x%llx", *reinterpret_cast<int64_t*>(lit_adr));
+                }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
                 args << FpRegisterRange(instr);
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 2aa6716..d32f998 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -32,8 +32,10 @@
 .endm
 
 .macro GENERATE_ALL_ALLOC_ENTRYPOINTS
-GENERATE_ALLOC_ENTRYPOINTS
-GENERATE_ALLOC_ENTRYPOINTS _instrumented, Instrumented
+GENERATE_ALLOC_ENTRYPOINTS _dlmalloc, DlMalloc
+GENERATE_ALLOC_ENTRYPOINTS _dlmalloc_instrumented, DlMallocInstrumented
+GENERATE_ALLOC_ENTRYPOINTS _rosalloc, RosAlloc
+GENERATE_ALLOC_ENTRYPOINTS _rosalloc_instrumented, RosAllocInstrumented
 GENERATE_ALLOC_ENTRYPOINTS _bump_pointer, BumpPointer
 GENERATE_ALLOC_ENTRYPOINTS _bump_pointer_instrumented, BumpPointerInstrumented
 GENERATE_ALLOC_ENTRYPOINTS _tlab, TLAB
diff --git a/runtime/arch/quick_alloc_entrypoints.cc b/runtime/arch/quick_alloc_entrypoints.cc
index 4cdb3f2..457c73a 100644
--- a/runtime/arch/quick_alloc_entrypoints.cc
+++ b/runtime/arch/quick_alloc_entrypoints.cc
@@ -51,13 +51,13 @@
 namespace art {
 
 // Generate the entrypoint functions.
-GENERATE_ENTRYPOINTS();
+GENERATE_ENTRYPOINTS(_dlmalloc);
+GENERATE_ENTRYPOINTS(_rosalloc);
 GENERATE_ENTRYPOINTS(_bump_pointer);
 GENERATE_ENTRYPOINTS(_tlab);
 
 static bool entry_points_instrumented = false;
-static gc::AllocatorType entry_points_allocator = kMovingCollector ?
-    gc::kAllocatorTypeBumpPointer : gc::kAllocatorTypeFreeList;
+static gc::AllocatorType entry_points_allocator = gc::kAllocatorTypeDlMalloc;
 
 void SetQuickAllocEntryPointsAllocator(gc::AllocatorType allocator) {
   entry_points_allocator = allocator;
@@ -69,15 +69,21 @@
 
 void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
   switch (entry_points_allocator) {
-    case gc::kAllocatorTypeFreeList: {
-      SetQuickAllocEntryPoints(qpoints, entry_points_instrumented);
+    case gc::kAllocatorTypeDlMalloc: {
+      SetQuickAllocEntryPoints_dlmalloc(qpoints, entry_points_instrumented);
+      break;
+    }
+    case gc::kAllocatorTypeRosAlloc: {
+      SetQuickAllocEntryPoints_rosalloc(qpoints, entry_points_instrumented);
       break;
     }
     case gc::kAllocatorTypeBumpPointer: {
+      CHECK(kMovingCollector);
       SetQuickAllocEntryPoints_bump_pointer(qpoints, entry_points_instrumented);
       break;
     }
     case gc::kAllocatorTypeTLAB: {
+      CHECK(kMovingCollector);
       SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
       break;
     }
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index e9bbf91..06c7b53 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -39,7 +39,7 @@
 #define STRING_DATA_OFFSET 12
 
 // Offsets within java.lang.Method.
-#define METHOD_DEX_CACHE_METHODS_OFFSET 16
-#define METHOD_CODE_OFFSET 40
+#define METHOD_DEX_CACHE_METHODS_OFFSET 12
+#define METHOD_CODE_OFFSET 36
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0b728a0..131ebf8 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1242,15 +1242,8 @@
   if (fields.get() == NULL) {
     return NULL;
   }
-  SirtRef<mirror::ObjectArray<mirror::StaticStorageBase> >
-      initialized_static_storage(self,
-                          AllocObjectArray<mirror::StaticStorageBase>(self, dex_file.NumTypeIds()));
-  if (initialized_static_storage.get() == NULL) {
-    return NULL;
-  }
-
   dex_cache->Init(&dex_file, location.get(), strings.get(), types.get(), methods.get(),
-                  fields.get(), initialized_static_storage.get());
+                  fields.get());
   return dex_cache.get();
 }
 
@@ -1905,7 +1898,6 @@
   dst->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
   dst->SetDexCacheResolvedMethods(klass->GetDexCache()->GetResolvedMethods());
   dst->SetDexCacheResolvedTypes(klass->GetDexCache()->GetResolvedTypes());
-  dst->SetDexCacheInitializedStaticStorage(klass->GetDexCache()->GetInitializedStaticStorage());
 
   uint32_t access_flags = it.GetMemberAccessFlags();
 
@@ -2926,8 +2918,6 @@
   CHECK_EQ(prototype->GetDexCacheStrings(), method->GetDexCacheStrings());
   CHECK_EQ(prototype->GetDexCacheResolvedMethods(), method->GetDexCacheResolvedMethods());
   CHECK_EQ(prototype->GetDexCacheResolvedTypes(), method->GetDexCacheResolvedTypes());
-  CHECK_EQ(prototype->GetDexCacheInitializedStaticStorage(),
-           method->GetDexCacheInitializedStaticStorage());
   CHECK_EQ(prototype->GetDexMethodIndex(), method->GetDexMethodIndex());
 
   MethodHelper mh(method);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 34134fa..1744050 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -159,15 +159,12 @@
     EXPECT_TRUE(method->GetDexCacheStrings() != NULL);
     EXPECT_TRUE(method->GetDexCacheResolvedMethods() != NULL);
     EXPECT_TRUE(method->GetDexCacheResolvedTypes() != NULL);
-    EXPECT_TRUE(method->GetDexCacheInitializedStaticStorage() != NULL);
     EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetStrings(),
               method->GetDexCacheStrings());
     EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetResolvedMethods(),
               method->GetDexCacheResolvedMethods());
     EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetResolvedTypes(),
               method->GetDexCacheResolvedTypes());
-    EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetInitializedStaticStorage(),
-              method->GetDexCacheInitializedStaticStorage());
   }
 
   void AssertField(mirror::Class* klass, mirror::ArtField* field)
@@ -468,7 +465,6 @@
   ArtMethodOffsets() : CheckOffsets<mirror::ArtMethod>(false, "Ljava/lang/reflect/ArtMethod;") {
     // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, declaring_class_),                      "declaringClass"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_initialized_static_storage_), "dexCacheInitializedStaticStorage"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_resolved_methods_),           "dexCacheResolvedMethods"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_resolved_types_),             "dexCacheResolvedTypes"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_strings_),                    "dexCacheStrings"));
@@ -607,7 +603,6 @@
   DexCacheOffsets() : CheckOffsets<mirror::DexCache>(false, "Ljava/lang/DexCache;") {
     // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_),                        "dex"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, initialized_static_storage_), "initializedStaticStorage"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, location_),                   "location"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, resolved_fields_),            "resolvedFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, resolved_methods_),           "resolvedMethods"));
@@ -1006,13 +1001,12 @@
   const DexFile::TypeId* type_id = dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
   ASSERT_TRUE(type_id != NULL);
   uint32_t type_idx = dex_file->GetIndexForTypeId(*type_id);
-  EXPECT_TRUE(clinit->GetDexCacheInitializedStaticStorage()->Get(type_idx) == NULL);
-  mirror::StaticStorageBase* uninit = ResolveVerifyAndClinit(type_idx, clinit, Thread::Current(), true, false);
+  mirror::Class* uninit = ResolveVerifyAndClinit(type_idx, clinit, Thread::Current(), true, false);
   EXPECT_TRUE(uninit != NULL);
-  EXPECT_TRUE(clinit->GetDexCacheInitializedStaticStorage()->Get(type_idx) == NULL);
-  mirror::StaticStorageBase* init = ResolveVerifyAndClinit(type_idx, getS0, Thread::Current(), true, false);
+  EXPECT_FALSE(uninit->IsInitialized());
+  mirror::Class* init = ResolveVerifyAndClinit(type_idx, getS0, Thread::Current(), true, false);
   EXPECT_TRUE(init != NULL);
-  EXPECT_EQ(init, clinit->GetDexCacheInitializedStaticStorage()->Get(type_idx));
+  EXPECT_TRUE(init->IsInitialized());
 }
 
 TEST_F(ClassLinkerTest, FinalizableBit) {
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 3ab8888..4e58a72 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -77,26 +77,33 @@
 mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
                                           int32_t component_count, Thread* self,
                                           bool access_check,
-                                          gc::AllocatorType allocator_type) {
+                                          gc::AllocatorType /* allocator_type */) {
   mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self,
                                                   access_check);
   if (UNLIKELY(klass == nullptr)) {
     return nullptr;
   }
-  return mirror::Array::Alloc<false>(self, klass, component_count, allocator_type);
+  // Always go slow path for now, filled new array is not common.
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
+  // the heap switched the allocator type while we were suspended.
+  return mirror::Array::Alloc<false>(self, klass, component_count, heap->GetCurrentAllocator());
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
 mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* referrer,
                                                       int32_t component_count, Thread* self,
                                                       bool access_check,
-                                                      gc::AllocatorType allocator_type) {
+                                                      gc::AllocatorType /* allocator_type */) {
   mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self,
                                                   access_check);
   if (UNLIKELY(klass == nullptr)) {
     return nullptr;
   }
-  return mirror::Array::Alloc<true>(self, klass, component_count, allocator_type);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
+  // the heap switched the allocator type while we were suspended.
+  return mirror::Array::Alloc<true>(self, klass, component_count, heap->GetCurrentAllocator());
 }
 
 void ThrowStackOverflowError(Thread* self) {
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index a60446ca..8304229 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -46,11 +46,12 @@
 template <const bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
                                                             mirror::ArtMethod* method,
-                                                            Thread* self)
+                                                            Thread* self, bool* slow_path)
     NO_THREAD_SAFETY_ANALYSIS {
   mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
   if (UNLIKELY(klass == NULL)) {
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
+    *slow_path = true;
     if (klass == NULL) {
       DCHECK(self->IsExceptionPending());
       return nullptr;  // Failure
@@ -61,11 +62,13 @@
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
                               PrettyDescriptor(klass).c_str());
+      *slow_path = true;
       return nullptr;  // Failure
     }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
+      *slow_path = true;
       return nullptr;  // Failure
     }
   }
@@ -76,6 +79,11 @@
       DCHECK(self->IsExceptionPending());
       return nullptr;  // Failure
     }
+    // TODO: EnsureInitialized may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of an old allocator. To handle
+    // this case we mark the slow path boolean as true so that the caller knows to check the
+    // allocator type to see if it has changed.
+    *slow_path = true;
     return sirt_klass.get();
   }
   return klass;
@@ -92,9 +100,14 @@
                                                                 Thread* self,
                                                                 gc::AllocatorType allocator_type)
     NO_THREAD_SAFETY_ANALYSIS {
-  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self);
-  if (UNLIKELY(klass == nullptr)) {
-    return nullptr;
+  bool slow_path = false;
+  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
   }
   return klass->Alloc<kInstrumented>(self, allocator_type);
 }
@@ -103,16 +116,19 @@
 template <bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
                                                            mirror::ArtMethod* method,
-                                                           int32_t component_count)
+                                                           int32_t component_count,
+                                                           bool* slow_path)
     NO_THREAD_SAFETY_ANALYSIS {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
+    *slow_path = true;
     return nullptr;  // Failure
   }
   mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-    if (klass == NULL) {  // Error
+    *slow_path = true;
+    if (klass == nullptr) {  // Error
       DCHECK(Thread::Current()->IsExceptionPending());
       return nullptr;  // Failure
     }
@@ -122,6 +138,7 @@
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
+      *slow_path = true;
       return nullptr;  // Failure
     }
   }
@@ -140,9 +157,16 @@
                                                               Thread* self,
                                                               gc::AllocatorType allocator_type)
     NO_THREAD_SAFETY_ANALYSIS {
-  mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count);
-  if (UNLIKELY(klass == nullptr)) {
-    return nullptr;
+  bool slow_path = false;
+  mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count,
+                                                       &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                               heap->GetCurrentAllocator());
   }
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
 }
@@ -517,15 +541,15 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
-  if (UNLIKELY(klass == NULL)) {
+  if (UNLIKELY(klass == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;  // Failure - Indicate to caller to deliver exception
+    return nullptr;  // Failure - Indicate to caller to deliver exception
   }
   // Perform access check if necessary.
   mirror::Class* referring_class = referrer->GetDeclaringClass();
   if (verify_access && UNLIKELY(!referring_class->CanAccess(klass))) {
     ThrowIllegalAccessErrorClass(referring_class, klass);
-    return NULL;  // Failure - Indicate to caller to deliver exception
+    return nullptr;  // Failure - Indicate to caller to deliver exception
   }
   // If we're just implementing const-class, we shouldn't call <clinit>.
   if (!can_run_clinit) {
@@ -541,9 +565,8 @@
   SirtRef<mirror::Class> sirt_class(self, klass);
   if (!class_linker->EnsureInitialized(sirt_class, true, true)) {
     CHECK(self->IsExceptionPending());
-    return NULL;  // Failure - Indicate to caller to deliver exception
+    return nullptr;  // Failure - Indicate to caller to deliver exception
   }
-  referrer->GetDexCacheInitializedStaticStorage()->Set(type_idx, sirt_class.get());
   return sirt_class.get();
 }
 
diff --git a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
index 0d57516..4c05e75 100644
--- a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
@@ -20,18 +20,21 @@
 
 namespace art {
 
+static constexpr gc::AllocatorType kPortableAllocatorType =
+    gc::kUseRosAlloc ? gc::kAllocatorTypeRosAlloc : gc::kAllocatorTypeDlMalloc;
+
 extern "C" mirror::Object* art_portable_alloc_object_from_code(uint32_t type_idx,
                                                                mirror::ArtMethod* referrer,
                                                                Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocObjectFromCode<false, true>(type_idx, referrer, thread, gc::kAllocatorTypeFreeList);
+  return AllocObjectFromCode<false, true>(type_idx, referrer, thread, kPortableAllocatorType);
 }
 
 extern "C" mirror::Object* art_portable_alloc_object_from_code_with_access_check(uint32_t type_idx,
                                                                                  mirror::ArtMethod* referrer,
                                                                                  Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocObjectFromCode<true, true>(type_idx, referrer, thread, gc::kAllocatorTypeFreeList);
+  return AllocObjectFromCode<true, true>(type_idx, referrer, thread, kPortableAllocatorType);
 }
 
 extern "C" mirror::Object* art_portable_alloc_array_from_code(uint32_t type_idx,
@@ -40,7 +43,7 @@
                                                               Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return AllocArrayFromCode<false, true>(type_idx, referrer, length, self,
-                                         gc::kAllocatorTypeFreeList);
+                                         kPortableAllocatorType);
 }
 
 extern "C" mirror::Object* art_portable_alloc_array_from_code_with_access_check(uint32_t type_idx,
@@ -49,7 +52,7 @@
                                                                                 Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return AllocArrayFromCode<true, true>(type_idx, referrer, length, self,
-                                        gc::kAllocatorTypeFreeList);
+                                        kPortableAllocatorType);
 }
 
 extern "C" mirror::Object* art_portable_check_and_alloc_array_from_code(uint32_t type_idx,
@@ -58,7 +61,7 @@
                                                                         Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return CheckAndAllocArrayFromCodeInstrumented(type_idx, referrer, length, thread, false,
-                                                gc::kAllocatorTypeFreeList);
+                                                kPortableAllocatorType);
 }
 
 extern "C" mirror::Object* art_portable_check_and_alloc_array_from_code_with_access_check(uint32_t type_idx,
@@ -67,7 +70,7 @@
                                                                                           Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return CheckAndAllocArrayFromCodeInstrumented(type_idx, referrer, length, thread, true,
-                                                gc::kAllocatorTypeFreeList);
+                                                kPortableAllocatorType);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 1ae39ab..b1dca77 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -79,7 +79,8 @@
     GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, Instrumented, true, allocator_type) \
     GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, , false, allocator_type)
 
-GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(, gc::kAllocatorTypeFreeList)
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(DlMalloc, gc::kAllocatorTypeDlMalloc)
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(RosAlloc, gc::kAllocatorTypeRosAlloc)
 GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(BumpPointer, gc::kAllocatorTypeBumpPointer)
 GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(TLAB, gc::kAllocatorTypeTLAB)
 
diff --git a/runtime/gc/accounting/heap_bitmap.cc b/runtime/gc/accounting/heap_bitmap.cc
index 5589461..6625b7b 100644
--- a/runtime/gc/accounting/heap_bitmap.cc
+++ b/runtime/gc/accounting/heap_bitmap.cc
@@ -55,11 +55,23 @@
   continuous_space_bitmaps_.push_back(bitmap);
 }
 
+void HeapBitmap::RemoveContinuousSpaceBitmap(accounting::SpaceBitmap* bitmap) {
+  auto it = std::find(continuous_space_bitmaps_.begin(), continuous_space_bitmaps_.end(), bitmap);
+  DCHECK(it != continuous_space_bitmaps_.end());
+  continuous_space_bitmaps_.erase(it);
+}
+
 void HeapBitmap::AddDiscontinuousObjectSet(SpaceSetMap* set) {
   DCHECK(set != NULL);
   discontinuous_space_sets_.push_back(set);
 }
 
+void HeapBitmap::RemoveDiscontinuousObjectSet(SpaceSetMap* set) {
+  auto it = std::find(discontinuous_space_sets_.begin(), discontinuous_space_sets_.end(), set);
+  DCHECK(it != discontinuous_space_sets_.end());
+  discontinuous_space_sets_.erase(it);
+}
+
 void HeapBitmap::Walk(SpaceBitmap::Callback* callback, void* arg) {
   for (const auto& bitmap : continuous_space_bitmaps_) {
     bitmap->Walk(callback, arg);
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 24ebbaa..bed2c1e 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -105,7 +105,9 @@
   const Heap* const heap_;
 
   void AddContinuousSpaceBitmap(SpaceBitmap* bitmap);
+  void RemoveContinuousSpaceBitmap(SpaceBitmap* bitmap);
   void AddDiscontinuousObjectSet(SpaceSetMap* set);
+  void RemoveDiscontinuousObjectSet(SpaceSetMap* set);
 
   // Bitmaps covering continuous spaces.
   SpaceBitmapVector continuous_space_bitmaps_;
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index b428e74..6d9dde7 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -82,6 +82,8 @@
     if (ref != nullptr) {
       Object* new_ref = visitor_(ref, arg_);
       if (new_ref != ref) {
+        // Use SetFieldPtr to avoid card mark as an optimization which reduces dirtied pages and
+        // improves performance.
         obj->SetFieldPtr(offset, new_ref, true);
       }
     }
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 6baee54..4822e64 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -65,6 +65,7 @@
 
 void GarbageCollector::Run(bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   pause_times_.clear();
   duration_ns_ = 0;
@@ -82,14 +83,23 @@
     // Pause is the entire length of the GC.
     uint64_t pause_start = NanoTime();
     ATRACE_BEGIN("Application threads suspended");
-    thread_list->SuspendAll();
-    GetHeap()->RevokeAllThreadLocalBuffers();
-    MarkingPhase();
-    ReclaimPhase();
-    thread_list->ResumeAll();
+    // Mutator lock may be already exclusively held when we do garbage collections for changing the
+    // current collector / allocator during process state updates.
+    if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      GetHeap()->RevokeAllThreadLocalBuffers();
+      MarkingPhase();
+      ReclaimPhase();
+    } else {
+      thread_list->SuspendAll();
+      GetHeap()->RevokeAllThreadLocalBuffers();
+      MarkingPhase();
+      ReclaimPhase();
+      thread_list->ResumeAll();
+    }
     ATRACE_END();
     RegisterPause(NanoTime() - pause_start);
   } else {
+    CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     Thread* self = Thread::Current();
     {
       ReaderMutexLock mu(self, *Locks::mutator_lock_);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index cae2a54..937ff6d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -333,12 +333,6 @@
     }
   }
 
-  // Before freeing anything, lets verify the heap.
-  if (kIsDebugBuild) {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    VerifyImageRoots();
-  }
-
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
@@ -595,23 +589,6 @@
   timings_.EndSplit();
 }
 
-void MarkSweep::CheckObject(const Object* obj) {
-  DCHECK(obj != NULL);
-  VisitObjectReferences(const_cast<Object*>(obj), [this](const Object* obj, const Object* ref,
-      MemberOffset offset, bool is_static) NO_THREAD_SAFETY_ANALYSIS {
-    Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
-    CheckReference(obj, ref, offset, is_static);
-  }, true);
-}
-
-void MarkSweep::VerifyImageRootVisitor(Object* root, void* arg) {
-  DCHECK(root != NULL);
-  DCHECK(arg != NULL);
-  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  DCHECK(mark_sweep->heap_->GetMarkBitmap()->Test(root));
-  mark_sweep->CheckObject(root);
-}
-
 void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
   CHECK(space->IsMallocSpace());
   space::MallocSpace* alloc_space = space->AsMallocSpace();
@@ -884,30 +861,6 @@
   }
 }
 
-void MarkSweep::VerifyImageRoots() {
-  // Verify roots ensures that all the references inside the image space point
-  // objects which are either in the image space or marked objects in the alloc
-  // space
-  timings_.StartSplit("VerifyImageRoots");
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsImageSpace()) {
-      space::ImageSpace* image_space = space->AsImageSpace();
-      uintptr_t begin = reinterpret_cast<uintptr_t>(image_space->Begin());
-      uintptr_t end = reinterpret_cast<uintptr_t>(image_space->End());
-      accounting::SpaceBitmap* live_bitmap = image_space->GetLiveBitmap();
-      DCHECK(live_bitmap != NULL);
-      live_bitmap->VisitMarkedRange(begin, end, [this](const Object* obj) {
-        if (kCheckLocks) {
-          Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
-        }
-        DCHECK(obj != NULL);
-        CheckObject(obj);
-      });
-    }
-  }
-  timings_.EndSplit();
-}
-
 class RecursiveMarkTask : public MarkStackTask<false> {
  public:
   RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
@@ -1050,12 +1003,6 @@
   Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
 
-struct SweepCallbackContext {
-  MarkSweep* mark_sweep;
-  space::AllocSpace* space;
-  Thread* self;
-};
-
 class CheckpointMarkThreadRoots : public Closure {
  public:
   explicit CheckpointMarkThreadRoots(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {}
@@ -1095,97 +1042,89 @@
   timings_.EndSplit();
 }
 
-void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  MarkSweep* mark_sweep = context->mark_sweep;
-  Heap* heap = mark_sweep->GetHeap();
-  space::AllocSpace* space = context->space;
-  Thread* self = context->self;
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
-  // Use a bulk free, that merges consecutive objects before freeing or free per object?
-  // Documentation suggests better free performance with merging, but this may be at the expensive
-  // of allocation.
-  size_t freed_objects = num_ptrs;
-  // AllocSpace::FreeList clears the value in ptrs, so perform after clearing the live bit
-  size_t freed_bytes = space->FreeList(self, num_ptrs, ptrs);
-  heap->RecordFree(freed_objects, freed_bytes);
-  mark_sweep->freed_objects_.FetchAndAdd(freed_objects);
-  mark_sweep->freed_bytes_.FetchAndAdd(freed_bytes);
-}
-
-void MarkSweep::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
-  Heap* heap = context->mark_sweep->GetHeap();
-  // We don't free any actual memory to avoid dirtying the shared zygote pages.
-  for (size_t i = 0; i < num_ptrs; ++i) {
-    Object* obj = static_cast<Object*>(ptrs[i]);
-    heap->GetLiveBitmap()->Clear(obj);
-    heap->GetCardTable()->MarkCard(obj);
-  }
-}
-
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
-  // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
-  // going to free.
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
-  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
-  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
-  if (swap_bitmaps) {
-    std::swap(live_bitmap, mark_bitmap);
-    std::swap(large_live_objects, large_mark_objects);
-  }
-
+  Thread* self = Thread::Current();
+  mirror::Object* chunk_free_buffer[kSweepArrayChunkFreeSize];
+  size_t chunk_free_pos = 0;
   size_t freed_bytes = 0;
   size_t freed_large_object_bytes = 0;
   size_t freed_objects = 0;
   size_t freed_large_objects = 0;
-  size_t count = allocations->Size();
+  // How many objects are left in the array, modified after each space is swept.
   Object** objects = const_cast<Object**>(allocations->Begin());
-  Object** out = objects;
-  Object** objects_to_chunk_free = out;
-
-  // Empty the allocation stack.
-  Thread* self = Thread::Current();
+  size_t count = allocations->Size();
+  // Change the order to ensure that the non-moving space last swept as an optimization.
+  std::vector<space::ContinuousSpace*> sweep_spaces;
+  space::ContinuousSpace* non_moving_space = nullptr;
+  for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
+    if (space->IsAllocSpace() && !IsImmuneSpace(space) && space->GetLiveBitmap() != nullptr) {
+      if (space == heap_->GetNonMovingSpace()) {
+        non_moving_space = space;
+      } else {
+        sweep_spaces.push_back(space);
+      }
+    }
+  }
+  // Unlikely to sweep a significant amount of non_movable objects, so we do these after the after
+  // the other alloc spaces as an optimization.
+  if (non_moving_space != nullptr) {
+    sweep_spaces.push_back(non_moving_space);
+  }
+  // Start by sweeping the continuous spaces.
+  for (space::ContinuousSpace* space : sweep_spaces) {
+    space::AllocSpace* alloc_space = space->AsAllocSpace();
+    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    if (swap_bitmaps) {
+      std::swap(live_bitmap, mark_bitmap);
+    }
+    Object** out = objects;
+    for (size_t i = 0; i < count; ++i) {
+      Object* obj = objects[i];
+      if (space->HasAddress(obj)) {
+        // This object is in the space, remove it from the array and add it to the sweep buffer
+        // if needed.
+        if (!mark_bitmap->Test(obj)) {
+          if (chunk_free_pos >= kSweepArrayChunkFreeSize) {
+            timings_.StartSplit("FreeList");
+            freed_objects += chunk_free_pos;
+            freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+            timings_.EndSplit();
+            chunk_free_pos = 0;
+          }
+          chunk_free_buffer[chunk_free_pos++] = obj;
+        }
+      } else {
+        *(out++) = obj;
+      }
+    }
+    if (chunk_free_pos > 0) {
+      timings_.StartSplit("FreeList");
+      freed_objects += chunk_free_pos;
+      freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
+      timings_.EndSplit();
+      chunk_free_pos = 0;
+    }
+    // All of the references which space contained are no longer in the allocation stack, update
+    // the count.
+    count = out - objects;
+  }
+  // Handle the large object space.
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
+  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
+  if (swap_bitmaps) {
+    std::swap(large_live_objects, large_mark_objects);
+  }
   for (size_t i = 0; i < count; ++i) {
     Object* obj = objects[i];
-    // There should only be objects in the AllocSpace/LargeObjectSpace in the allocation stack.
-    if (LIKELY(mark_bitmap->HasAddress(obj))) {
-      if (!mark_bitmap->Test(obj)) {
-        // Don't bother un-marking since we clear the mark bitmap anyways.
-        *(out++) = obj;
-        // Free objects in chunks.
-        DCHECK_GE(out, objects_to_chunk_free);
-        DCHECK_LE(static_cast<size_t>(out - objects_to_chunk_free), kSweepArrayChunkFreeSize);
-        if (static_cast<size_t>(out - objects_to_chunk_free) == kSweepArrayChunkFreeSize) {
-          timings_.StartSplit("FreeList");
-          size_t chunk_freed_objects = out - objects_to_chunk_free;
-          freed_objects += chunk_freed_objects;
-          freed_bytes += space->FreeList(self, chunk_freed_objects, objects_to_chunk_free);
-          objects_to_chunk_free = out;
-          timings_.EndSplit();
-        }
-      }
-    } else if (!large_mark_objects->Test(obj)) {
+    // Handle large objects.
+    if (!large_mark_objects->Test(obj)) {
       ++freed_large_objects;
       freed_large_object_bytes += large_object_space->Free(self, obj);
     }
   }
-  // Free the remaining objects in chunks.
-  DCHECK_GE(out, objects_to_chunk_free);
-  DCHECK_LE(static_cast<size_t>(out - objects_to_chunk_free), kSweepArrayChunkFreeSize);
-  if (out - objects_to_chunk_free > 0) {
-    timings_.StartSplit("FreeList");
-    size_t chunk_freed_objects = out - objects_to_chunk_free;
-    freed_objects += chunk_freed_objects;
-    freed_bytes += space->FreeList(self, chunk_freed_objects, objects_to_chunk_free);
-    timings_.EndSplit();
-  }
-  CHECK_EQ(count, allocations->Size());
   timings_.EndSplit();
 
   timings_.StartSplit("RecordFree");
@@ -1206,45 +1145,19 @@
 void MarkSweep::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
-
-  const bool partial = (GetGcType() == kGcTypePartial);
-  SweepCallbackContext scc;
-  scc.mark_sweep = this;
-  scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (!space->IsMallocSpace()) {
-      continue;
-    }
-    // We always sweep always collect spaces.
-    bool sweep_space = space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect;
-    if (!partial && !sweep_space) {
-      // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
-      sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
-    }
-    if (sweep_space) {
-      uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
-      uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsMallocSpace();
-      accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-      if (swap_bitmaps) {
-        std::swap(live_bitmap, mark_bitmap);
-      }
-      if (!space->IsZygoteSpace()) {
-        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
-        // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
-                                           &SweepCallback, reinterpret_cast<void*>(&scc));
-      } else {
-        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
-        // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
-        // memory.
-        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
-                                           &ZygoteSweepCallback, reinterpret_cast<void*>(&scc));
-      }
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* malloc_space = space->AsMallocSpace();
+      TimingLogger::ScopedSplit split(
+          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+      size_t freed_objects = 0;
+      size_t freed_bytes = 0;
+      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      heap_->RecordFree(freed_objects, freed_bytes);
+      freed_objects_.FetchAndAdd(freed_objects);
+      freed_bytes_.FetchAndAdd(freed_bytes);
     }
   }
-
   SweepLargeObjects(swap_bitmaps);
 }
 
@@ -1272,48 +1185,6 @@
   GetHeap()->RecordFree(freed_objects, freed_bytes);
 }
 
-void MarkSweep::CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) {
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace() && space->Contains(ref)) {
-      DCHECK(IsMarked(obj));
-
-      bool is_marked = IsMarked(ref);
-      if (!is_marked) {
-        LOG(INFO) << *space;
-        LOG(WARNING) << (is_static ? "Static ref'" : "Instance ref'") << PrettyTypeOf(ref)
-                     << "' (" << reinterpret_cast<const void*>(ref) << ") in '" << PrettyTypeOf(obj)
-                     << "' (" << reinterpret_cast<const void*>(obj) << ") at offset "
-                     << reinterpret_cast<void*>(offset.Int32Value()) << " wasn't marked";
-
-        const Class* klass = is_static ? obj->AsClass() : obj->GetClass();
-        DCHECK(klass != NULL);
-        const ObjectArray<ArtField>* fields = is_static ? klass->GetSFields() : klass->GetIFields();
-        DCHECK(fields != NULL);
-        bool found = false;
-        for (int32_t i = 0; i < fields->GetLength(); ++i) {
-          const ArtField* cur = fields->Get(i);
-          if (cur->GetOffset().Int32Value() == offset.Int32Value()) {
-            LOG(WARNING) << "Field referencing the alloc space was " << PrettyField(cur);
-            found = true;
-            break;
-          }
-        }
-        if (!found) {
-          LOG(WARNING) << "Could not find field in object alloc space with offset " << offset.Int32Value();
-        }
-
-        bool obj_marked = heap_->GetCardTable()->IsDirty(obj);
-        if (!obj_marked) {
-          LOG(WARNING) << "Object '" << PrettyTypeOf(obj) << "' "
-                       << "(" << reinterpret_cast<const void*>(obj) << ") contains references to "
-                       << "the alloc space, but wasn't card marked";
-        }
-      }
-    }
-    break;
-  }
-}
-
 // Process the "referent" field in a java.lang.ref.Reference.  If the
 // referent has not yet been marked, put it on the appropriate list in
 // the heap for later processing.
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 62991bb..e2eafb5 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -100,11 +100,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Verify that image roots point to only marked objects within the alloc space.
-  void VerifyImageRoots()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Builds a mark stack and recursively mark until it empties.
   void RecursiveMark()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -251,20 +246,6 @@
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(const mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
-  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  // Special sweep for zygote that just marks objects / dirties cards.
-  static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  void CheckReference(const mirror::Object* obj, const mirror::Object* ref, MemberOffset offset,
-                      bool is_static)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  void CheckObject(const mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   // Verify the roots of the heap and print out information related to any invalid roots.
   // Called in MarkObject, so may we may not hold the mutator lock.
   void VerifyRoots()
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index a4f7121..0150609 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -99,9 +99,13 @@
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      ImmuneSpace(space);
+    if (space->GetLiveBitmap() != nullptr) {
+      if (space == to_space_) {
+        BindLiveToMarkBitmap(to_space_);
+      } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
+          || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+        ImmuneSpace(space);
+      }
     }
   }
   timings_.EndSplit();
@@ -115,11 +119,6 @@
       immune_end_(nullptr),
       to_space_(nullptr),
       from_space_(nullptr),
-      soft_reference_list_(nullptr),
-      weak_reference_list_(nullptr),
-      finalizer_reference_list_(nullptr),
-      phantom_reference_list_(nullptr),
-      cleared_reference_list_(nullptr),
       self_(nullptr),
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0) {
@@ -132,15 +131,12 @@
   DCHECK(mark_stack_ != nullptr);
   immune_begin_ = nullptr;
   immune_end_ = nullptr;
-  soft_reference_list_ = nullptr;
-  weak_reference_list_ = nullptr;
-  finalizer_reference_list_ = nullptr;
-  phantom_reference_list_ = nullptr;
-  cleared_reference_list_ = nullptr;
   self_ = Thread::Current();
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
   heap_->PreGcVerification(this);
+  // Set the initial bitmap.
+  to_space_live_bitmap_ = to_space_->GetLiveBitmap();
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
@@ -229,17 +225,18 @@
     SweepSystemWeaks();
   }
   // Record freed memory.
-  int from_bytes = from_space_->GetBytesAllocated();
-  int to_bytes = to_space_->GetBytesAllocated();
-  int from_objects = from_space_->GetObjectsAllocated();
-  int to_objects = to_space_->GetObjectsAllocated();
-  int freed_bytes = from_bytes - to_bytes;
-  int freed_objects = from_objects - to_objects;
-  CHECK_GE(freed_bytes, 0);
+  uint64_t from_bytes = from_space_->GetBytesAllocated();
+  uint64_t to_bytes = to_space_->GetBytesAllocated();
+  uint64_t from_objects = from_space_->GetObjectsAllocated();
+  uint64_t to_objects = to_space_->GetObjectsAllocated();
+  CHECK_LE(to_objects, from_objects);
+  int64_t freed_bytes = from_bytes - to_bytes;
+  int64_t freed_objects = from_objects - to_objects;
   freed_bytes_.FetchAndAdd(freed_bytes);
   freed_objects_.FetchAndAdd(freed_objects);
-  heap_->RecordFree(static_cast<size_t>(freed_objects), static_cast<size_t>(freed_bytes));
-
+  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
+  // space.
+  heap_->RecordFree(freed_objects, freed_bytes);
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
@@ -356,6 +353,9 @@
         // Make sure to only update the forwarding address AFTER you copy the object so that the
         // monitor word doesn't get stomped over.
         obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)));
+        if (to_space_live_bitmap_ != nullptr) {
+          to_space_live_bitmap_->Set(forward_address);
+        }
         MarkStackPush(forward_address);
       } else {
         DCHECK(to_space_->HasAddress(forward_address) ||
@@ -465,45 +465,19 @@
 void SemiSpace::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
-
-  const bool partial = (GetGcType() == kGcTypePartial);
-  SweepCallbackContext scc;
-  scc.mark_sweep = this;
-  scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (!space->IsMallocSpace()) {
-      continue;
-    }
-    // We always sweep always collect spaces.
-    bool sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect);
-    if (!partial && !sweep_space) {
-      // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
-      sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
-    }
-    if (sweep_space && space->IsMallocSpace()) {
-      uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
-      uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsMallocSpace();
-      accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-      if (swap_bitmaps) {
-        std::swap(live_bitmap, mark_bitmap);
-      }
-      if (!space->IsZygoteSpace()) {
-        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
-        // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
-                                           &SweepCallback, reinterpret_cast<void*>(&scc));
-      } else {
-        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
-        // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
-        // memory.
-        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
-                                           &ZygoteSweepCallback, reinterpret_cast<void*>(&scc));
-      }
+    if (space->IsMallocSpace() && space != from_space_ && space != to_space_) {
+      space::MallocSpace* malloc_space = space->AsMallocSpace();
+      TimingLogger::ScopedSplit split(
+          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+      size_t freed_objects = 0;
+      size_t freed_bytes = 0;
+      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      heap_->RecordFree(freed_objects, freed_bytes);
+      freed_objects_.FetchAndAdd(freed_objects);
+      freed_bytes_.FetchAndAdd(freed_bytes);
     }
   }
-
   SweepLargeObjects(swap_bitmaps);
 }
 
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b0724f9..b76ef5f 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -131,10 +131,6 @@
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  mirror::Object* GetClearedReferences() {
-    return cleared_reference_list_;
-  }
-
   // TODO: enable thread safety analysis when in use by multiple worker threads.
   template <typename MarkVisitor>
   void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
@@ -269,16 +265,12 @@
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
 
-  // Destination and source spaces.
+  // Destination and source spaces (can be any type of ContinuousMemMapAllocSpace which either has
+  // a live bitmap or doesn't).
   space::ContinuousMemMapAllocSpace* to_space_;
+  accounting::SpaceBitmap* to_space_live_bitmap_;  // Cached live bitmap as an optimization.
   space::ContinuousMemMapAllocSpace* from_space_;
 
-  mirror::Object* soft_reference_list_;
-  mirror::Object* weak_reference_list_;
-  mirror::Object* finalizer_reference_list_;
-  mirror::Object* phantom_reference_list_;
-  mirror::Object* cleared_reference_list_;
-
   Thread* self_;
 
   // Used for kEnableSimplePromo. The end/top of the bump pointer
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index ee6077a..c562e8c 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -56,8 +56,7 @@
 }
 
 void StickyMarkSweep::Sweep(bool swap_bitmaps) {
-  accounting::ObjectStack* live_stack = GetHeap()->GetLiveStack();
-  SweepArray(live_stack, false);
+  SweepArray(GetHeap()->GetLiveStack(), false);
 }
 
 void StickyMarkSweep::MarkThreadRoots(Thread* self) {
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index af1b26b..5e1136b 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -52,8 +52,14 @@
   size_t bytes_allocated;
   obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated);
   if (UNLIKELY(obj == nullptr)) {
+    bool is_current_allocator = allocator == GetCurrentAllocator();
     obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &klass);
     if (obj == nullptr) {
+      bool after_is_current_allocator = allocator == GetCurrentAllocator();
+      if (is_current_allocator && !after_is_current_allocator) {
+        // If the allocator changed, we need to restart the allocation.
+        return AllocObject<kInstrumented>(self, klass, byte_count);
+      }
       return nullptr;
     }
   }
@@ -120,14 +126,6 @@
   if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
-  if (kInstrumented) {
-    if (UNLIKELY(running_on_valgrind_ && allocator_type == kAllocatorTypeFreeList)) {
-      return non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
-    }
-  } else {
-    // If running on valgrind, we should be using the instrumented path.
-    DCHECK(!running_on_valgrind_);
-  }
   mirror::Object* ret;
   switch (allocator_type) {
     case kAllocatorTypeBumpPointer: {
@@ -139,16 +137,30 @@
       }
       break;
     }
-    case kAllocatorTypeFreeList: {
-      if (kUseRosAlloc) {
-        ret = reinterpret_cast<space::RosAllocSpace*>(non_moving_space_)->AllocNonvirtual(
-            self, alloc_size, bytes_allocated);
+    case kAllocatorTypeRosAlloc: {
+      if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
+        // If running on valgrind, we should be using the instrumented path.
+        ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated);
       } else {
-        ret = reinterpret_cast<space::DlMallocSpace*>(non_moving_space_)->AllocNonvirtual(
-            self, alloc_size, bytes_allocated);
+        DCHECK(!running_on_valgrind_);
+        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated);
       }
       break;
     }
+    case kAllocatorTypeDlMalloc: {
+      if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
+        // If running on valgrind, we should be using the instrumented path.
+        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated);
+      } else {
+        DCHECK(!running_on_valgrind_);
+        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated);
+      }
+      break;
+    }
+    case kAllocatorTypeNonMoving: {
+      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
+      break;
+    }
     case kAllocatorTypeLOS: {
       ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated);
       // Note that the bump pointer spaces aren't necessarily next to
@@ -159,15 +171,15 @@
     }
     case kAllocatorTypeTLAB: {
       alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
-      if (UNLIKELY(self->TLABSize() < alloc_size)) {
+      if (UNLIKELY(self->TlabSize() < alloc_size)) {
         // Try allocating a new thread local buffer, if the allocaiton fails the space must be
         // full so return nullptr.
-        if (!bump_pointer_space_->AllocNewTLAB(self, alloc_size + kDefaultTLABSize)) {
+        if (!bump_pointer_space_->AllocNewTlab(self, alloc_size + kDefaultTLABSize)) {
           return nullptr;
         }
       }
       // The allocation can't fail.
-      ret = self->AllocTLAB(alloc_size);
+      ret = self->AllocTlab(alloc_size);
       DCHECK(ret != nullptr);
       *bytes_allocated = alloc_size;
       break;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e08106b..6e2bf91 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -75,13 +75,17 @@
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
-           CollectorType post_zygote_collector_type, size_t parallel_gc_threads,
-           size_t conc_gc_threads, bool low_memory_mode, size_t long_pause_log_threshold,
-           size_t long_gc_log_threshold, bool ignore_max_footprint, bool use_tlab)
+           CollectorType post_zygote_collector_type, CollectorType background_collector_type,
+           size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
+           size_t long_pause_log_threshold, size_t long_gc_log_threshold,
+           bool ignore_max_footprint, bool use_tlab)
     : non_moving_space_(nullptr),
+      rosalloc_space_(nullptr),
+      dlmalloc_space_(nullptr),
       concurrent_gc_(false),
       collector_type_(kCollectorTypeNone),
       post_zygote_collector_type_(post_zygote_collector_type),
+      background_collector_type_(background_collector_type),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
       low_memory_mode_(low_memory_mode),
@@ -116,7 +120,7 @@
       verify_pre_gc_heap_(false),
       verify_post_gc_heap_(false),
       verify_mod_union_table_(false),
-      min_alloc_space_size_for_sticky_gc_(2 * MB),
+      min_alloc_space_size_for_sticky_gc_(1112 * MB),
       min_remaining_space_for_sticky_gc_(1 * MB),
       last_trim_time_ms_(0),
       allocation_rate_(0),
@@ -127,8 +131,8 @@
        */
       max_allocation_stack_size_(kGCALotMode ? kGcAlotInterval
           : (kDesiredHeapVerification > kVerifyAllFast) ? KB : MB),
-      current_allocator_(kMovingCollector ? kAllocatorTypeBumpPointer : kAllocatorTypeFreeList),
-      current_non_moving_allocator_(kAllocatorTypeFreeList),
+      current_allocator_(kAllocatorTypeDlMalloc),
+      current_non_moving_allocator_(kAllocatorTypeNonMoving),
       bump_pointer_space_(nullptr),
       temp_space_(nullptr),
       reference_referent_offset_(0),
@@ -150,7 +154,7 @@
   }
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
-  if (!Runtime::Current()->IsZygote()) {
+  if (!Runtime::Current()->IsZygote() || !kMovingCollector) {
     ChangeCollector(post_zygote_collector_type_);
   } else {
     // We are the zygote, use bump pointer allocation + semi space collector.
@@ -173,20 +177,23 @@
       requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
     }
   }
-
   const char* name = Runtime::Current()->IsZygote() ? "zygote space" : "alloc space";
-  if (!kUseRosAlloc) {
-    non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                     requested_alloc_space_begin);
+  space::MallocSpace* malloc_space;
+  if (kUseRosAlloc) {
+    malloc_space = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                requested_alloc_space_begin, low_memory_mode_);
+    CHECK(malloc_space != nullptr) << "Failed to create rosalloc space";
   } else {
-    non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                     requested_alloc_space_begin, low_memory_mode_);
+    malloc_space = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                requested_alloc_space_begin);
+    CHECK(malloc_space != nullptr) << "Failed to create dlmalloc space";
   }
+
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
     // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
     // issues.
-    const size_t bump_pointer_space_size = std::min(non_moving_space_->Capacity(), 128 * MB);
+    const size_t bump_pointer_space_size = std::min(malloc_space->Capacity(), 128 * MB);
     bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
                                                           bump_pointer_space_size, nullptr);
     CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
@@ -196,19 +203,18 @@
     CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
     AddSpace(temp_space_);
   }
-
-  CHECK(non_moving_space_ != NULL) << "Failed to create non-moving space";
-  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
-  AddSpace(non_moving_space_);
+  non_moving_space_ = malloc_space;
+  malloc_space->SetFootprintLimit(malloc_space->Capacity());
+  AddSpace(malloc_space);
 
   // Allocate the large object space.
-  const bool kUseFreeListSpaceForLOS = false;
+  constexpr bool kUseFreeListSpaceForLOS = false;
   if (kUseFreeListSpaceForLOS) {
-    large_object_space_ = space::FreeListSpace::Create("large object space", NULL, capacity);
+    large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity);
   } else {
     large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
   }
-  CHECK(large_object_space_ != NULL) << "Failed to create large object space";
+  CHECK(large_object_space_ != nullptr) << "Failed to create large object space";
   AddSpace(large_object_space_);
 
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
@@ -278,7 +284,9 @@
 }
 
 void Heap::ChangeAllocator(AllocatorType allocator) {
+  // These two allocators are only used internally and don't have any entrypoints.
   DCHECK_NE(allocator, kAllocatorTypeLOS);
+  DCHECK_NE(allocator, kAllocatorTypeNonMoving);
   if (current_allocator_ != allocator) {
     current_allocator_ = allocator;
     SetQuickAllocEntryPointsAllocator(current_allocator_);
@@ -322,7 +330,16 @@
 }
 
 void Heap::UpdateProcessState(ProcessState process_state) {
-  process_state_ = process_state;
+  if (process_state_ != process_state) {
+    process_state_ = process_state;
+    if (process_state_ == kProcessStateJankPerceptible) {
+      TransitionCollector(post_zygote_collector_type_);
+    } else {
+      TransitionCollector(background_collector_type_);
+    }
+  } else {
+    CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
+  }
 }
 
 void Heap::CreateThreadPool() {
@@ -351,15 +368,28 @@
 }
 
 void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
-  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(), stack);
+  space::ContinuousSpace* space1 = rosalloc_space_ != nullptr ? rosalloc_space_ : non_moving_space_;
+  space::ContinuousSpace* space2 = dlmalloc_space_ != nullptr ? dlmalloc_space_ : non_moving_space_;
+  // This is just logic to handle a case of either not having a rosalloc or dlmalloc space.
+  // TODO: Generalize this to n bitmaps?
+  if (space1 == nullptr) {
+    DCHECK(space2 != nullptr);
+    space1 = space2;
+  }
+  if (space2 == nullptr) {
+    DCHECK(space1 != nullptr);
+    space2 = space1;
+  }
+  MarkAllocStack(space1->GetLiveBitmap(), space2->GetLiveBitmap(),
+                 large_object_space_->GetLiveObjects(), stack);
 }
 
 void Heap::DeleteThreadPool() {
   thread_pool_.reset(nullptr);
 }
 
-void Heap::AddSpace(space::Space* space) {
-  DCHECK(space != NULL);
+void Heap::AddSpace(space::Space* space, bool set_as_default) {
+  DCHECK(space != nullptr);
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   if (space->IsContinuousSpace()) {
     DCHECK(!space->IsDiscontinuousSpace());
@@ -372,31 +402,19 @@
       live_bitmap_->AddContinuousSpaceBitmap(live_bitmap);
       mark_bitmap_->AddContinuousSpaceBitmap(mark_bitmap);
     }
-
     continuous_spaces_.push_back(continuous_space);
-    if (continuous_space->IsMallocSpace()) {
-      non_moving_space_ = continuous_space->AsMallocSpace();
+    if (set_as_default) {
+      if (continuous_space->IsDlMallocSpace()) {
+        dlmalloc_space_ = continuous_space->AsDlMallocSpace();
+      } else if (continuous_space->IsRosAllocSpace()) {
+        rosalloc_space_ = continuous_space->AsRosAllocSpace();
+      }
     }
-
     // Ensure that spaces remain sorted in increasing order of start address.
     std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
               [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
       return a->Begin() < b->Begin();
     });
-    // Ensure that ImageSpaces < ZygoteSpaces < AllocSpaces so that we can do address based checks to
-    // avoid redundant marking.
-    bool seen_zygote = false, seen_alloc = false;
-    for (const auto& space : continuous_spaces_) {
-      if (space->IsImageSpace()) {
-        CHECK(!seen_zygote);
-        CHECK(!seen_alloc);
-      } else if (space->IsZygoteSpace()) {
-        CHECK(!seen_alloc);
-        seen_zygote = true;
-      } else if (space->IsMallocSpace()) {
-        seen_alloc = true;
-      }
-    }
   } else {
     DCHECK(space->IsDiscontinuousSpace());
     space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
@@ -411,6 +429,47 @@
   }
 }
 
+void Heap::RemoveSpace(space::Space* space) {
+  DCHECK(space != nullptr);
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  if (space->IsContinuousSpace()) {
+    DCHECK(!space->IsDiscontinuousSpace());
+    space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
+    // Continuous spaces don't necessarily have bitmaps.
+    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    if (live_bitmap != nullptr) {
+      DCHECK(mark_bitmap != nullptr);
+      live_bitmap_->RemoveContinuousSpaceBitmap(live_bitmap);
+      mark_bitmap_->RemoveContinuousSpaceBitmap(mark_bitmap);
+    }
+    auto it = std::find(continuous_spaces_.begin(), continuous_spaces_.end(), continuous_space);
+    DCHECK(it != continuous_spaces_.end());
+    continuous_spaces_.erase(it);
+    if (continuous_space == dlmalloc_space_) {
+      dlmalloc_space_ = nullptr;
+    } else if (continuous_space == rosalloc_space_) {
+      rosalloc_space_ = nullptr;
+    }
+  } else {
+    DCHECK(space->IsDiscontinuousSpace());
+    space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
+    DCHECK(discontinuous_space->GetLiveObjects() != nullptr);
+    live_bitmap_->RemoveDiscontinuousObjectSet(discontinuous_space->GetLiveObjects());
+    DCHECK(discontinuous_space->GetMarkObjects() != nullptr);
+    mark_bitmap_->RemoveDiscontinuousObjectSet(discontinuous_space->GetMarkObjects());
+    auto it = std::find(discontinuous_spaces_.begin(), discontinuous_spaces_.end(),
+                        discontinuous_space);
+    DCHECK(it != discontinuous_spaces_.end());
+    discontinuous_spaces_.erase(it);
+  }
+  if (space->IsAllocSpace()) {
+    auto it = std::find(alloc_spaces_.begin(), alloc_spaces_.end(), space->AsAllocSpace());
+    DCHECK(it != alloc_spaces_.end());
+    alloc_spaces_.erase(it);
+  }
+}
+
 void Heap::RegisterGCAllocation(size_t bytes) {
   if (this != nullptr) {
     gc_memory_overhead_.FetchAndAdd(bytes);
@@ -845,10 +904,9 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
-  DCHECK_LE(freed_bytes, static_cast<size_t>(num_bytes_allocated_));
+void Heap::RecordFree(int64_t freed_objects, int64_t freed_bytes) {
+  DCHECK_LE(freed_bytes, num_bytes_allocated_.Load());
   num_bytes_allocated_.FetchAndSub(freed_bytes);
-
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
     thread_stats->freed_objects += freed_objects;
@@ -864,12 +922,19 @@
                                              size_t alloc_size, size_t* bytes_allocated,
                                              mirror::Class** klass) {
   mirror::Object* ptr = nullptr;
+  bool was_default_allocator = allocator == GetCurrentAllocator();
   DCHECK(klass != nullptr);
   SirtRef<mirror::Class> sirt_klass(self, *klass);
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
   collector::GcType last_gc = WaitForGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
+    // If we were the default allocator but the allocator changed while we were suspended,
+    // abort the allocation.
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
     ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated);
   }
@@ -880,7 +945,13 @@
       break;
     }
     // Attempt to run the collector, if we succeed, re-try the allocation.
-    if (CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone) {
+    bool gc_ran =
+        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
+    if (gc_ran) {
       // Did we free sufficient memory for the allocation to succeed?
       ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated);
     }
@@ -901,6 +972,10 @@
     // We don't need a WaitForGcToComplete here either.
     DCHECK(!gc_plan_.empty());
     CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
     ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated);
     if (ptr == nullptr) {
       ThrowOutOfMemoryError(self, alloc_size, false);
@@ -1065,6 +1140,92 @@
   CollectGarbageInternal(gc_plan_.back(), kGcCauseExplicit, clear_soft_references);
 }
 
+void Heap::TransitionCollector(CollectorType collector_type) {
+  if (collector_type == collector_type_) {
+    return;
+  }
+  uint64_t start_time = NanoTime();
+  int32_t before_size  = GetTotalMemory();
+  int32_t before_allocated = num_bytes_allocated_.Load();
+  ThreadList* tl = Runtime::Current()->GetThreadList();
+  Thread* self = Thread::Current();
+  ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
+  Locks::mutator_lock_->AssertNotHeld(self);
+  // Busy wait until we can GC (StartGC can fail if we have a non-zero gc_disable_count_, this
+  // rarely occurs however).
+  while (!StartGC(self)) {
+    usleep(100);
+  }
+  tl->SuspendAll();
+  switch (collector_type) {
+    case kCollectorTypeSS: {
+      mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
+      space::MallocSpace* main_space;
+      if (rosalloc_space_ != nullptr) {
+        DCHECK(kUseRosAlloc);
+        main_space = rosalloc_space_;
+      } else {
+        DCHECK(dlmalloc_space_ != nullptr);
+        main_space = dlmalloc_space_;
+      }
+      Compact(temp_space_, main_space);
+      DCHECK(allocator_mem_map_.get() == nullptr);
+      allocator_mem_map_.reset(main_space->ReleaseMemMap());
+      madvise(main_space->Begin(), main_space->Size(), MADV_DONTNEED);
+      RemoveSpace(main_space);
+      break;
+    }
+    case kCollectorTypeMS:
+      // Fall through.
+    case kCollectorTypeCMS: {
+      if (collector_type_ == kCollectorTypeSS) {
+        // TODO: Use mem-map from temp space?
+        MemMap* mem_map = allocator_mem_map_.release();
+        CHECK(mem_map != nullptr);
+        size_t initial_size = kDefaultInitialSize;
+        mprotect(mem_map->Begin(), initial_size, PROT_READ | PROT_WRITE);
+        space::MallocSpace* malloc_space;
+        if (kUseRosAlloc) {
+          malloc_space =
+              space::RosAllocSpace::CreateFromMemMap(mem_map, "alloc space", kPageSize,
+                                                     initial_size, mem_map->Size(),
+                                                     mem_map->Size(), low_memory_mode_);
+        } else {
+          malloc_space =
+              space::DlMallocSpace::CreateFromMemMap(mem_map, "alloc space", kPageSize,
+                                                     initial_size, mem_map->Size(),
+                                                     mem_map->Size());
+        }
+        malloc_space->SetFootprintLimit(malloc_space->Capacity());
+        AddSpace(malloc_space);
+        Compact(malloc_space, bump_pointer_space_);
+      }
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Attempted to transition to invalid collector type";
+      break;
+    }
+  }
+  ChangeCollector(collector_type);
+  tl->ResumeAll();
+  // Can't call into java code with all threads suspended.
+  EnqueueClearedReferences();
+  uint64_t duration = NanoTime() - start_time;
+  GrowForUtilization(collector::kGcTypeFull, duration);
+  FinishGC(self, collector::kGcTypeFull);
+  int32_t after_size = GetTotalMemory();
+  int32_t delta_size = before_size - after_size;
+  int32_t after_allocated = num_bytes_allocated_.Load();
+  int32_t delta_allocated = before_allocated - after_allocated;
+  const std::string saved_bytes_str =
+      delta_size < 0 ? "-" + PrettySize(-delta_size) : PrettySize(delta_size);
+  LOG(INFO) << "Heap transition to " << process_state_ << " took "
+      << PrettyDuration(duration) << " " << PrettySize(before_size) << "->"
+      << PrettySize(after_size) << " from " << PrettySize(delta_allocated) << " to "
+      << PrettySize(delta_size) << " saved";
+}
+
 void Heap::ChangeCollector(CollectorType collector_type) {
   // TODO: Only do this with all mutators suspended to avoid races.
   if (collector_type != collector_type_) {
@@ -1086,7 +1247,7 @@
         gc_plan_.push_back(collector::kGcTypeSticky);
         gc_plan_.push_back(collector::kGcTypePartial);
         gc_plan_.push_back(collector::kGcTypeFull);
-        ChangeAllocator(kAllocatorTypeFreeList);
+        ChangeAllocator(kUseRosAlloc ? kAllocatorTypeRosAlloc : kAllocatorTypeDlMalloc);
         break;
       }
       case kCollectorTypeCMS: {
@@ -1094,7 +1255,7 @@
         gc_plan_.push_back(collector::kGcTypeSticky);
         gc_plan_.push_back(collector::kGcTypePartial);
         gc_plan_.push_back(collector::kGcTypeFull);
-        ChangeAllocator(kAllocatorTypeFreeList);
+        ChangeAllocator(kUseRosAlloc ? kAllocatorTypeRosAlloc : kAllocatorTypeDlMalloc);
         break;
       }
       default: {
@@ -1123,7 +1284,6 @@
     return;
   }
   VLOG(heap) << "Starting PreZygoteFork";
-  // Do this before acquiring the zygote creation lock so that we don't get lock order violations.
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
@@ -1152,7 +1312,13 @@
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
   // the remaining available heap memory.
   space::MallocSpace* zygote_space = non_moving_space_;
-  non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space", low_memory_mode_);
+  non_moving_space_ = non_moving_space_->CreateZygoteSpace("alloc space", low_memory_mode_);
+  if (non_moving_space_->IsRosAllocSpace()) {
+    rosalloc_space_ = non_moving_space_->AsRosAllocSpace();
+  } else if (non_moving_space_->IsDlMallocSpace()) {
+    dlmalloc_space_ = non_moving_space_->AsDlMallocSpace();
+  }
+  // Can't use RosAlloc for non moving space due to thread local buffers.
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
@@ -1168,22 +1334,35 @@
   for (const auto& collector : garbage_collectors_) {
     collector->ResetCumulativeStatistics();
   }
+  // TODO: Not limited space for non-movable objects?
+  space::MallocSpace* new_non_moving_space
+      = space::DlMallocSpace::Create("Non moving dlmalloc space", 2 * MB, 64 * MB, 64 * MB,
+                                     nullptr);
+  AddSpace(new_non_moving_space, false);
+  CHECK(new_non_moving_space != nullptr) << "Failed to create new non-moving space";
+  new_non_moving_space->SetFootprintLimit(new_non_moving_space->Capacity());
+  non_moving_space_ = new_non_moving_space;
 }
 
 void Heap::FlushAllocStack() {
-  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
-                 allocation_stack_.get());
+  MarkAllocStackAsLive(allocation_stack_.get());
   allocation_stack_->Reset();
 }
 
-void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetMap* large_objects,
+void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap1,
+                          accounting::SpaceBitmap* bitmap2,
+                          accounting::SpaceSetMap* large_objects,
                           accounting::ObjectStack* stack) {
+  DCHECK(bitmap1 != nullptr);
+  DCHECK(bitmap2 != nullptr);
   mirror::Object** limit = stack->End();
   for (mirror::Object** it = stack->Begin(); it != limit; ++it) {
     const mirror::Object* obj = *it;
-    DCHECK(obj != NULL);
-    if (LIKELY(bitmap->HasAddress(obj))) {
-      bitmap->Set(obj);
+    DCHECK(obj != nullptr);
+    if (bitmap1->HasAddress(obj)) {
+      bitmap1->Set(obj);
+    } else if (bitmap2->HasAddress(obj)) {
+      bitmap2->Set(obj);
     } else {
       large_objects->Set(obj);
     }
@@ -1223,14 +1402,6 @@
   Runtime* runtime = Runtime::Current();
   // If the heap can't run the GC, silently fail and return that no GC was run.
   switch (gc_type) {
-    case collector::kGcTypeSticky: {
-      const size_t alloc_space_size = non_moving_space_->Size();
-      if (alloc_space_size < min_alloc_space_size_for_sticky_gc_ ||
-        non_moving_space_->Capacity() - alloc_space_size < min_remaining_space_for_sticky_gc_) {
-        return collector::kGcTypeNone;
-      }
-      break;
-    }
     case collector::kGcTypePartial: {
       if (!have_zygote_space_) {
         return collector::kGcTypeNone;
@@ -1247,19 +1418,9 @@
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
-  {
-    gc_complete_lock_->AssertNotHeld(self);
-    MutexLock mu(self, *gc_complete_lock_);
-    // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(self);
-    // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
-    //       Not doing at the moment to ensure soft references are cleared.
-    // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
-    if (gc_disable_count_ != 0) {
-      LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
-      return collector::kGcTypeNone;
-    }
-    is_gc_running_ = true;
+  gc_complete_lock_->AssertNotHeld(self);
+  if (!StartGC(self)) {
+    return collector::kGcTypeNone;
   }
   if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
     ++runtime->GetStats()->gc_for_alloc_count;
@@ -1290,7 +1451,8 @@
     mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
     collector = semi_space_collector_;
     gc_type = collector::kGcTypeFull;
-  } else if (current_allocator_ == kAllocatorTypeFreeList) {
+  } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
+      current_allocator_ == kAllocatorTypeDlMalloc) {
     for (const auto& cur_collector : garbage_collectors_) {
       if (cur_collector->IsConcurrent() == concurrent_gc_ &&
           cur_collector->GetGcType() == gc_type) {
@@ -1312,6 +1474,7 @@
   total_bytes_freed_ever_ += collector->GetFreedBytes();
 
   // Enqueue cleared references.
+  Locks::mutator_lock_->AssertNotHeld(self);
   EnqueueClearedReferences();
 
   // Grow the heap so that we know when to perform the next GC.
@@ -1322,7 +1485,7 @@
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
     // GC for alloc pauses the allocating thread, so consider it as a pause.
     bool was_slow = duration > long_gc_log_threshold_ ||
-            (gc_cause == kGcCauseForAlloc && duration > long_pause_log_threshold_);
+        (gc_cause == kGcCauseForAlloc && duration > long_pause_log_threshold_);
     if (!was_slow) {
       for (uint64_t pause : pauses) {
         was_slow = was_slow || pause > long_pause_log_threshold_;
@@ -1350,15 +1513,7 @@
         }
     }
   }
-
-  {
-      MutexLock mu(self, *gc_complete_lock_);
-      is_gc_running_ = false;
-      last_gc_type_ = gc_type;
-      // Wake anyone who may have been waiting for the GC to complete.
-      gc_complete_cond_->Broadcast(self);
-  }
-
+  FinishGC(self, gc_type);
   ATRACE_END();
 
   // Inform DDMS that a GC completed.
@@ -1366,6 +1521,29 @@
   return gc_type;
 }
 
+bool Heap::StartGC(Thread* self) {
+  MutexLock mu(self, *gc_complete_lock_);
+  // Ensure there is only one GC at a time.
+  WaitForGcToCompleteLocked(self);
+  // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
+  //       Not doing at the moment to ensure soft references are cleared.
+  // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
+  if (gc_disable_count_ != 0) {
+    LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
+    return false;
+  }
+  is_gc_running_ = true;
+  return true;
+}
+
+void Heap::FinishGC(Thread* self, collector::GcType gc_type) {
+  MutexLock mu(self, *gc_complete_lock_);
+  is_gc_running_ = false;
+  last_gc_type_ = gc_type;
+  // Wake anyone who may have been waiting for the GC to complete.
+  gc_complete_cond_->Broadcast(self);
+}
+
 static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg) {
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
   if (root == obj) {
@@ -2046,14 +2224,18 @@
 }
 
 void Heap::RevokeThreadLocalBuffers(Thread* thread) {
-  non_moving_space_->RevokeThreadLocalBuffers(thread);
+  if (rosalloc_space_ != nullptr) {
+    rosalloc_space_->RevokeThreadLocalBuffers(thread);
+  }
   if (bump_pointer_space_ != nullptr) {
     bump_pointer_space_->RevokeThreadLocalBuffers(thread);
   }
 }
 
 void Heap::RevokeAllThreadLocalBuffers() {
-  non_moving_space_->RevokeAllThreadLocalBuffers();
+  if (rosalloc_space_ != nullptr) {
+    rosalloc_space_->RevokeAllThreadLocalBuffers();
+  }
   if (bump_pointer_space_ != nullptr) {
     bump_pointer_space_->RevokeAllThreadLocalBuffers();
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 832d5ec..1b221fa 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -90,10 +90,12 @@
 
 // Different types of allocators.
 enum AllocatorType {
-  kAllocatorTypeBumpPointer,
-  kAllocatorTypeTLAB,
-  kAllocatorTypeFreeList,  // ROSAlloc / dlmalloc
-  kAllocatorTypeLOS,  // Large object space.
+  kAllocatorTypeBumpPointer,  // Use BumpPointer allocator, has entrypoints.
+  kAllocatorTypeTLAB,  // Use TLAB allocator, has entrypoints.
+  kAllocatorTypeRosAlloc,  // Use RosAlloc allocator, has entrypoints.
+  kAllocatorTypeDlMalloc,  // Use dlmalloc allocator, has entrypoints.
+  kAllocatorTypeNonMoving,  // Special allocator for non moving objects, doesn't have entrypoints.
+  kAllocatorTypeLOS,  // Large object space, also doesn't have entrypoints.
 };
 
 // What caused the GC?
@@ -126,6 +128,7 @@
   kProcessStateJankPerceptible = 0,
   kProcessStateJankImperceptible = 1,
 };
+std::ostream& operator<<(std::ostream& os, const ProcessState& process_state);
 
 class Heap {
  public:
@@ -153,7 +156,8 @@
   // ImageWriter output.
   explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
                 size_t max_free, double target_utilization, size_t capacity,
-                const std::string& original_image_file_name, CollectorType collector_type_,
+                const std::string& original_image_file_name,
+                CollectorType post_zygote_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
                 bool ignore_max_footprint, bool use_tlab);
@@ -162,14 +166,13 @@
 
   // Allocates and initializes storage for an object instance.
   template <bool kInstrumented>
-  inline mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
                                                          GetCurrentAllocator());
   }
   template <bool kInstrumented>
-  inline mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass,
-                                               size_t num_bytes)
+  mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
                                                          GetCurrentNonMovingAllocator());
@@ -204,6 +207,9 @@
   // Change the allocator, updates entrypoints.
   void ChangeAllocator(AllocatorType allocator);
 
+  // Transition the garbage collector during runtime, may copy objects from one space to another.
+  void TransitionCollector(CollectorType collector_type);
+
   // Change the collector to be one of the possible options (MS, CMS, SS).
   void ChangeCollector(CollectorType collector_type);
 
@@ -358,11 +364,14 @@
     return low_memory_mode_;
   }
 
-  void RecordFree(size_t freed_objects, size_t freed_bytes);
+  // Freed bytes can be negative in cases where we copy objects from a compacted space to a
+  // free-list backed space.
+  void RecordFree(int64_t freed_objects, int64_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
-  void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/, const mirror::Object* /*new_value*/) {
+  void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/,
+                         const mirror::Object* /*new_value*/) {
     card_table_->MarkCard(dst);
   }
 
@@ -458,8 +467,8 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Mark all the objects in the allocation stack in the specified bitmap.
-  void MarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetMap* large_objects,
-                      accounting::ObjectStack* stack)
+  void MarkAllocStack(accounting::SpaceBitmap* bitmap1, accounting::SpaceBitmap* bitmap2,
+                      accounting::SpaceSetMap* large_objects, accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Mark the specified allocation stack as live.
@@ -470,6 +479,14 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
+  space::DlMallocSpace* GetDlMallocSpace() const {
+    return dlmalloc_space_;
+  }
+
+  space::RosAllocSpace* GetRosAllocSpace() const {
+    return rosalloc_space_;
+  }
+
   space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
   }
@@ -510,6 +527,9 @@
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space);
 
+  bool StartGC(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
+
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
     return
         allocator_type != kAllocatorTypeBumpPointer &&
@@ -614,7 +634,9 @@
 
   size_t GetPercentFree();
 
-  void AddSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space, bool set_as_default = true)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // No thread saftey analysis since we call this everywhere and it is impossible to find a proper
   // lock ordering for it.
@@ -642,6 +664,12 @@
   // Classes, ArtMethods, ArtFields, and non moving objects.
   space::MallocSpace* non_moving_space_;
 
+  // Space which we use for the kAllocatorTypeROSAlloc.
+  space::RosAllocSpace* rosalloc_space_;
+
+  // Space which we use for the kAllocatorTypeDlMalloc.
+  space::DlMallocSpace* dlmalloc_space_;
+
   // The large object space we are currently allocating into.
   space::LargeObjectSpace* large_object_space_;
 
@@ -651,6 +679,10 @@
   // A mod-union table remembers all of the references from the it's space to other spaces.
   SafeMap<space::Space*, accounting::ModUnionTable*> mod_union_tables_;
 
+  // Keep the free list allocator mem map lying around when we transition to background so that we
+  // don't have to worry about virtual address space fragmentation.
+  UniquePtr<MemMap> allocator_mem_map_;
+
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.
   bool concurrent_gc_;
@@ -659,6 +691,8 @@
   CollectorType collector_type_;
   // Which collector we will switch to after zygote fork.
   CollectorType post_zygote_collector_type_;
+  // Which collector we will use when the app is notified of a transition to background.
+  CollectorType background_collector_type_;
 
   // How many GC threads we may use for paused parts of garbage collection.
   const size_t parallel_gc_threads_;
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index d5bc667..4dc17df 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -209,17 +209,17 @@
 void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
   objects_allocated_.FetchAndAdd(thread->thread_local_objects_);
   bytes_allocated_.FetchAndAdd(thread->thread_local_pos_ - thread->thread_local_start_);
-  thread->SetTLAB(nullptr, nullptr);
+  thread->SetTlab(nullptr, nullptr);
 }
 
-bool BumpPointerSpace::AllocNewTLAB(Thread* self, size_t bytes) {
+bool BumpPointerSpace::AllocNewTlab(Thread* self, size_t bytes) {
   MutexLock mu(Thread::Current(), block_lock_);
   RevokeThreadLocalBuffersLocked(self);
   byte* start = AllocBlock(bytes);
   if (start == nullptr) {
     return false;
   }
-  self->SetTLAB(start, start + bytes);
+  self->SetTlab(start, start + bytes);
   return true;
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 0a4be8a..3e25b6b 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -90,12 +90,13 @@
   }
 
   // Clear the memory and reset the pointer to the start of the space.
-  void Clear();
+  void Clear() LOCKS_EXCLUDED(block_lock_);
 
   void Dump(std::ostream& os) const;
 
-  void RevokeThreadLocalBuffers(Thread* thread);
-  void RevokeAllThreadLocalBuffers();
+  void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
+  void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
+                                                    Locks::thread_list_lock_);
 
   uint64_t GetBytesAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint64_t GetObjectsAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -114,7 +115,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Allocate a new TLAB, returns false if the allocation failed.
-  bool AllocNewTLAB(Thread* self, size_t bytes);
+  bool AllocNewTlab(Thread* self, size_t bytes);
 
   virtual BumpPointerSpace* AsBumpPointerSpace() {
     return this;
@@ -147,7 +148,7 @@
   byte* growth_end_;
   AtomicInteger objects_allocated_;  // Accumulated from revoked thread local regions.
   AtomicInteger bytes_allocated_;  // Accumulated from revoked thread local regions.
-  Mutex block_lock_;
+  Mutex block_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // The number of blocks in the space, if it is 0 then the space has one long continuous block
   // which doesn't have an updated header.
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index a4e6eda..981af53 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -42,34 +42,15 @@
   CHECK(mspace != NULL);
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                                     size_t capacity, byte* requested_begin) {
-  uint64_t start_time = 0;
-  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    start_time = NanoTime();
-    VLOG(startup) << "DlMallocSpace::Create entering " << name
-                  << " initial_size=" << PrettySize(initial_size)
-                  << " growth_limit=" << PrettySize(growth_limit)
-                  << " capacity=" << PrettySize(capacity)
-                  << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
-  }
-
-  // Memory we promise to dlmalloc before it asks for morecore.
-  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
-  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
-  // size of the large allocation) will be greater than the footprint limit.
-  size_t starting_size = kPageSize;
-  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
-                                 requested_begin);
-  if (mem_map == NULL) {
-    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
-               << PrettySize(capacity);
-    return NULL;
-  }
+DlMallocSpace* DlMallocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                               size_t starting_size,
+                                               size_t initial_size, size_t growth_limit,
+                                               size_t capacity) {
+  DCHECK(mem_map != nullptr);
   void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
-  if (mspace == NULL) {
+  if (mspace == nullptr) {
     LOG(ERROR) << "Failed to initialize mspace for alloc space (" << name << ")";
-    return NULL;
+    return nullptr;
   }
 
   // Protect memory beyond the initial size.
@@ -79,14 +60,41 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  DlMallocSpace* space;
   byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindMallocSpace<DlMallocSpace, void*>(
+    return new ValgrindMallocSpace<DlMallocSpace, void*>(
         name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
-    space = new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
+    return new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
   }
+}
+
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
+  uint64_t start_time = 0;
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    start_time = NanoTime();
+    LOG(INFO) << "DlMallocSpace::Create entering " << name
+        << " initial_size=" << PrettySize(initial_size)
+        << " growth_limit=" << PrettySize(growth_limit)
+        << " capacity=" << PrettySize(capacity)
+        << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
+  }
+
+  // Memory we promise to dlmalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == nullptr) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
+    return nullptr;
+  }
+  DlMallocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
+                                          growth_limit, capacity);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -102,7 +110,7 @@
   // morecore_start. Don't use an internal dlmalloc lock (as we already hold heap lock). When
   // morecore_start bytes of memory is exhaused morecore will be called.
   void* msp = create_mspace_with_base(begin, morecore_start, false /*locked*/);
-  if (msp != NULL) {
+  if (msp != nullptr) {
     // Do not allow morecore requests to succeed beyond the initial size of the heap
     mspace_set_footprint_limit(msp, initial_size);
   } else {
@@ -202,9 +210,22 @@
 // Callback from dlmalloc when it needs to increase the footprint
 extern "C" void* art_heap_morecore(void* mspace, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK(heap->GetNonMovingSpace()->IsDlMallocSpace());
-  DCHECK_EQ(heap->GetNonMovingSpace()->AsDlMallocSpace()->GetMspace(), mspace);
-  return heap->GetNonMovingSpace()->MoreCore(increment);
+  DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace();
+  // Support for multiple DlMalloc provided by a slow path.
+  if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) {
+    dlmalloc_space = nullptr;
+    for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
+      if (space->IsDlMallocSpace()) {
+        DlMallocSpace* cur_dlmalloc_space = space->AsDlMallocSpace();
+        if (cur_dlmalloc_space->GetMspace() == mspace) {
+          dlmalloc_space = cur_dlmalloc_space;
+          break;
+        }
+      }
+    }
+    CHECK(dlmalloc_space != nullptr) << "Couldn't find DlmMallocSpace with mspace=" << mspace;
+  }
+  return dlmalloc_space->MoreCore(increment);
 }
 
 size_t DlMallocSpace::AllocationSize(const mirror::Object* obj) {
@@ -265,6 +286,12 @@
   return objects_allocated;
 }
 
+void DlMallocSpace::Clear() {
+  madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
+  GetLiveBitmap()->Clear();
+  GetMarkBitmap()->Clear();
+}
+
 #ifndef NDEBUG
 void DlMallocSpace::CheckMoreCoreForPrecondition() {
   lock_.AssertHeld(Thread::Current());
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 73e65d4..671d2b2 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -33,6 +33,11 @@
 // An alloc space is a space where objects may be allocated and garbage collected.
 class DlMallocSpace : public MallocSpace {
  public:
+  // Create a DlMallocSpace from an existing mem_map.
+  static DlMallocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                         size_t starting_size, size_t initial_size,
+                                         size_t growth_limit, size_t capacity);
+
   // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
   // the caller should call Begin on the returned space to confirm the
@@ -90,6 +95,8 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
+  virtual void Clear();
+
   virtual void InvalidateAllocator() {
     mspace_for_alloc_ = nullptr;
   }
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 46df0a1..31d878c 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -16,7 +16,8 @@
 
 #include "malloc_space.h"
 
-#include "gc/accounting/card_table.h"
+#include "gc/accounting/card_table-inl.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -80,10 +81,9 @@
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
                                          PROT_READ | PROT_WRITE, &error_msg);
-  if (mem_map == NULL) {
+  if (mem_map == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
                << PrettySize(*capacity) << ": " << error_msg;
-    return NULL;
   }
   return mem_map;
 }
@@ -189,9 +189,6 @@
   size_t size = RoundUp(Size(), kPageSize);
   // Trim the heap so that we minimize the size of the Zygote space.
   Trim();
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
   // Remaining size is for the new alloc space.
   const size_t growth_limit = growth_limit_ - size;
   const size_t capacity = Capacity() - size;
@@ -202,6 +199,10 @@
              << "Capacity " << Capacity();
   SetGrowthLimit(RoundUp(size, kPageSize));
   SetFootprintLimit(RoundUp(size, kPageSize));
+
+  // TODO: Not hardcode these in?
+  const size_t starting_size = kPageSize;
+  const size_t initial_size = 2 * MB;
   // FIXME: Do we need reference counted pointers here?
   // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
   VLOG(heap) << "Creating new AllocSpace: ";
@@ -238,6 +239,83 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
+struct SweepCallbackContext {
+  bool swap_bitmaps;
+  Heap* heap;
+  space::MallocSpace* space;
+  Thread* self;
+  size_t freed_objects;
+  size_t freed_bytes;
+};
+
+static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  space::AllocSpace* space = context->space;
+  Thread* self = context->self;
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
+  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
+  // the bitmaps as an optimization.
+  if (!context->swap_bitmaps) {
+    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      bitmap->Clear(ptrs[i]);
+    }
+  }
+  // Use a bulk free, that merges consecutive objects before freeing or free per object?
+  // Documentation suggests better free performance with merging, but this may be at the expensive
+  // of allocation.
+  context->freed_objects += num_ptrs;
+  context->freed_bytes += space->FreeList(self, num_ptrs, ptrs);
+}
+
+static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
+  accounting::CardTable* card_table = context->heap->GetCardTable();
+  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
+  // the bitmaps as an optimization.
+  if (!context->swap_bitmaps) {
+    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      bitmap->Clear(ptrs[i]);
+    }
+  }
+  // We don't free any actual memory to avoid dirtying the shared zygote pages.
+  for (size_t i = 0; i < num_ptrs; ++i) {
+    // Need to mark the card since this will update the mod-union table next GC cycle.
+    card_table->MarkCard(ptrs[i]);
+  }
+}
+
+void MallocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
+  DCHECK(freed_objects != nullptr);
+  DCHECK(freed_bytes != nullptr);
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
+  // If the bitmaps are bound then sweeping this space clearly won't do anything.
+  if (live_bitmap == mark_bitmap) {
+    return;
+  }
+  SweepCallbackContext scc;
+  scc.swap_bitmaps = swap_bitmaps;
+  scc.heap = Runtime::Current()->GetHeap();
+  scc.self = Thread::Current();
+  scc.space = this;
+  scc.freed_objects = 0;
+  scc.freed_bytes = 0;
+  if (swap_bitmaps) {
+    std::swap(live_bitmap, mark_bitmap);
+  }
+  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
+  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
+                                     reinterpret_cast<uintptr_t>(Begin()),
+                                     reinterpret_cast<uintptr_t>(End()),
+                                     IsZygoteSpace() ? &ZygoteSweepCallback : &SweepCallback,
+                                     reinterpret_cast<void*>(&scc));
+  *freed_objects += scc.freed_objects;
+  *freed_bytes += scc.freed_bytes;
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index d25f9cb..7681b6d 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -148,6 +148,9 @@
   // don't do this we may get heap corruption instead of a segfault at null.
   virtual void InvalidateAllocator() = 0;
 
+  // Sweep the references in the malloc space.
+  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
               byte* limit, size_t growth_limit);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 80fdb6c..e5993f6 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -44,6 +44,36 @@
   CHECK(rosalloc != NULL);
 }
 
+RosAllocSpace* RosAllocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                               size_t starting_size,
+                                               size_t initial_size, size_t growth_limit,
+                                               size_t capacity, bool low_memory_mode) {
+  DCHECK(mem_map != nullptr);
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
+                                                 low_memory_mode);
+  if (rosalloc == NULL) {
+    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
+    return NULL;
+  }
+
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
+  }
+
+  // Everything is set so record in immutable structure and leave
+  RosAllocSpace* space;
+  byte* begin = mem_map->Begin();
+  if (RUNNING_ON_VALGRIND > 0) {
+    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
+        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
+  } else {
+    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+  }
+  return space;
+}
+
 RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                      size_t capacity, byte* requested_begin, bool low_memory_mode) {
   uint64_t start_time = 0;
@@ -68,28 +98,9 @@
                << PrettySize(capacity);
     return NULL;
   }
-  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
-                                                 low_memory_mode);
-  if (rosalloc == NULL) {
-    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
-    return NULL;
-  }
 
-  // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
-  }
-
-  // Everything is set so record in immutable structure and leave
-  RosAllocSpace* space;
-  byte* begin = mem_map->Begin();
-  if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
-        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
-  } else {
-    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
-  }
+  RosAllocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
+                                          growth_limit, capacity, low_memory_mode);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -114,7 +125,7 @@
     rosalloc->SetFootprintLimit(initial_size);
   } else {
     PLOG(ERROR) << "RosAlloc::Create failed";
-    }
+  }
   return rosalloc;
 }
 
@@ -203,9 +214,10 @@
 // Callback from rosalloc when it needs to increase the footprint
 extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK(heap->GetNonMovingSpace()->IsRosAllocSpace());
-  DCHECK_EQ(heap->GetNonMovingSpace()->AsRosAllocSpace()->GetRosAlloc(), rosalloc);
-  return heap->GetNonMovingSpace()->MoreCore(increment);
+  RosAllocSpace* rosalloc_space = heap->GetRosAllocSpace();
+  DCHECK(rosalloc_space != nullptr);
+  DCHECK_EQ(rosalloc_space->GetRosAlloc(), rosalloc);
+  return rosalloc_space->MoreCore(increment);
 }
 
 size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
@@ -299,6 +311,12 @@
   rosalloc_->RevokeAllThreadLocalRuns();
 }
 
+void RosAllocSpace::Clear() {
+  madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
+  GetLiveBitmap()->Clear();
+  GetMarkBitmap()->Clear();
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index b0c07fa..6720976 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -39,6 +39,10 @@
   // request was granted.
   static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin, bool low_memory_mode);
+  static RosAllocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
+                                         size_t starting_size, size_t initial_size,
+                                         size_t growth_limit, size_t capacity,
+                                         bool low_memory_mode);
 
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
@@ -78,6 +82,7 @@
   size_t GetFootprintLimit();
   void SetFootprintLimit(size_t limit);
 
+  virtual void Clear();
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                               byte* begin, byte* end, byte* limit, size_t growth_limit);
 
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index db3aca9..31bbb7b 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -354,6 +354,10 @@
     return mem_map_.get();
   }
 
+  MemMap* ReleaseMemMap() {
+    return mem_map_.release();
+  }
+
  protected:
   MemMapSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end, byte* limit,
               GcRetentionPolicy gc_retention_policy)
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 47c1899..710d9dd 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -413,7 +413,10 @@
   if (enable_instrumentation) {
     // Instrumentation wasn't enabled so enable it.
     SetQuickAllocEntryPointsInstrumented(true);
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
     ResetQuickAllocEntryPoints();
+    tl->ResumeAll();
   }
 }
 
@@ -425,21 +428,18 @@
       quick_alloc_entry_points_instrumentation_counter_.FetchAndSub(1) == 1;
   if (disable_instrumentation) {
     SetQuickAllocEntryPointsInstrumented(false);
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
     ResetQuickAllocEntryPoints();
+    tl->ResumeAll();
   }
 }
 
 void Instrumentation::ResetQuickAllocEntryPoints() {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsStarted()) {
-    ThreadList* tl = runtime->GetThreadList();
-    Thread* self = Thread::Current();
-    tl->SuspendAll();
-    {
-      MutexLock mu(self, *Locks::thread_list_lock_);
-      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
-    }
-    tl->ResumeAll();
+    MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    runtime->GetThreadList()->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
   }
 }
 
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index c9bf160..088f616 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -73,12 +73,6 @@
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_), false);
 }
 
-inline ObjectArray<StaticStorageBase>* ArtMethod::GetDexCacheInitializedStaticStorage() const {
-  return GetFieldObject<ObjectArray<StaticStorageBase>*>(
-      OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_initialized_static_storage_),
-      false);
-}
-
 inline uint32_t ArtMethod::GetCodeSize() const {
   DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
   uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromCompiledCode());
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index a4f6b3b..f4a076c 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -86,11 +86,6 @@
                  new_dex_cache_classes, false);
 }
 
-void ArtMethod::SetDexCacheInitializedStaticStorage(ObjectArray<StaticStorageBase>* new_value) {
-  SetFieldObject(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_initialized_static_storage_),
-      new_value, false);
-}
-
 size_t ArtMethod::NumArgRegisters(const StringPiece& shorty) {
   CHECK_LE(1, shorty.length());
   uint32_t num_registers = 0;
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index d5524ec..963b4d5 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -184,11 +184,6 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
   }
 
-  static MemberOffset DexCacheInitializedStaticStorageOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod,
-        dex_cache_initialized_static_storage_);
-  }
-
   ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() const;
   void SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -197,10 +192,6 @@
   void SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_types)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<StaticStorageBase>* GetDexCacheInitializedStaticStorage() const;
-  void SetDexCacheInitializedStaticStorage(ObjectArray<StaticStorageBase>* new_value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Find the method that this method overrides
   ArtMethod* FindOverriddenMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -391,9 +382,6 @@
   Class* declaring_class_;
 
   // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<StaticStorageBase>* dex_cache_initialized_static_storage_;
-
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
   ObjectArray<ArtMethod>* dex_cache_resolved_methods_;
 
   // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 50ede66..9aa23d9 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -70,15 +70,8 @@
 class DexCache;
 class IfTable;
 
-// Type for the InitializedStaticStorage table. Currently the Class
-// provides the static storage. However, this might change to an Array
-// to improve image sharing, so we use this type to avoid assumptions
-// on the current storage.
-class MANAGED StaticStorageBase : public Object {
-};
-
 // C++ mirror of java.lang.Class
-class MANAGED Class : public StaticStorageBase {
+class MANAGED Class : public Object {
  public:
   // Class Status
   //
@@ -133,6 +126,10 @@
 
   void SetStatus(Status new_status, Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static MemberOffset StatusOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, status_);
+  }
+
   // Returns true if the class has failed to link.
   bool IsErroneous() const {
     return GetStatus() == kStatusError;
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 00531e3..fa0900c 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -36,15 +36,13 @@
                     ObjectArray<String>* strings,
                     ObjectArray<Class>* resolved_types,
                     ObjectArray<ArtMethod>* resolved_methods,
-                    ObjectArray<ArtField>* resolved_fields,
-                    ObjectArray<StaticStorageBase>* initialized_static_storage) {
-  CHECK(dex_file != NULL);
-  CHECK(location != NULL);
-  CHECK(strings != NULL);
-  CHECK(resolved_types != NULL);
-  CHECK(resolved_methods != NULL);
-  CHECK(resolved_fields != NULL);
-  CHECK(initialized_static_storage != NULL);
+                    ObjectArray<ArtField>* resolved_fields) {
+  CHECK(dex_file != nullptr);
+  CHECK(location != nullptr);
+  CHECK(strings != nullptr);
+  CHECK(resolved_types != nullptr);
+  CHECK(resolved_methods != nullptr);
+  CHECK(resolved_fields != nullptr);
 
   SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file, false);
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location, false);
@@ -52,8 +50,6 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_types_), resolved_types, false);
   SetFieldObject(ResolvedMethodsOffset(), resolved_methods, false);
   SetFieldObject(ResolvedFieldsOffset(), resolved_fields, false);
-  SetFieldObject(OFFSET_OF_OBJECT_MEMBER(DexCache, initialized_static_storage_),
-                 initialized_static_storage, false);
 
   Runtime* runtime = Runtime::Current();
   if (runtime->HasResolutionMethod()) {
@@ -68,11 +64,11 @@
 
 void DexCache::Fixup(ArtMethod* trampoline) {
   // Fixup the resolve methods array to contain trampoline for resolution.
-  CHECK(trampoline != NULL);
+  CHECK(trampoline != nullptr);
   ObjectArray<ArtMethod>* resolved_methods = GetResolvedMethods();
   size_t length = resolved_methods->GetLength();
   for (size_t i = 0; i < length; i++) {
-    if (resolved_methods->GetWithoutChecks(i) == NULL) {
+    if (resolved_methods->GetWithoutChecks(i) == nullptr) {
       resolved_methods->SetWithoutChecks(i, trampoline);
     }
   }
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 0522f13..a5fe598 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -47,8 +47,7 @@
             ObjectArray<String>* strings,
             ObjectArray<Class>* types,
             ObjectArray<ArtMethod>* methods,
-            ObjectArray<ArtField>* fields,
-            ObjectArray<StaticStorageBase>* initialized_static_storage)
+            ObjectArray<ArtField>* fields)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Fixup(ArtMethod* trampoline) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -85,11 +84,6 @@
     return GetResolvedFields()->GetLength();
   }
 
-  size_t NumInitializedStaticStorage() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetInitializedStaticStorage()->GetLength();
-  }
-
   String* GetResolvedString(uint32_t string_idx) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStrings()->Get(string_idx);
@@ -149,12 +143,6 @@
     return GetFieldObject< ObjectArray<ArtField>* >(ResolvedFieldsOffset(), false);
   }
 
-  ObjectArray<StaticStorageBase>* GetInitializedStaticStorage() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<StaticStorageBase>* >(
-        OFFSET_OF_OBJECT_MEMBER(DexCache, initialized_static_storage_), false);
-  }
-
   const DexFile* GetDexFile() const {
     return GetFieldPtr<const DexFile*>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), false);
   }
@@ -165,7 +153,6 @@
 
  private:
   Object* dex_;
-  ObjectArray<StaticStorageBase>* initialized_static_storage_;
   String* location_;
   ObjectArray<ArtField>* resolved_fields_;
   ObjectArray<ArtMethod>* resolved_methods_;
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 441c6da..6bed224 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -39,13 +39,11 @@
   EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),   dex_cache->NumResolvedTypes());
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods());
   EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
-  EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),   dex_cache->NumInitializedStaticStorage());
 
   EXPECT_LE(0, dex_cache->GetStrings()->GetLength());
   EXPECT_LE(0, dex_cache->GetResolvedTypes()->GetLength());
   EXPECT_LE(0, dex_cache->GetResolvedMethods()->GetLength());
   EXPECT_LE(0, dex_cache->GetResolvedFields()->GetLength());
-  EXPECT_LE(0, dex_cache->GetInitializedStaticStorage()->GetLength());
 
   EXPECT_EQ(java_lang_dex_file_->NumStringIds(),
             static_cast<uint32_t>(dex_cache->GetStrings()->GetLength()));
@@ -55,8 +53,6 @@
             static_cast<uint32_t>(dex_cache->GetResolvedMethods()->GetLength()));
   EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),
             static_cast<uint32_t>(dex_cache->GetResolvedFields()->GetLength()));
-  EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),
-            static_cast<uint32_t>(dex_cache->GetInitializedStaticStorage()->GetLength()));
 }
 
 }  // namespace mirror
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 7890071..c9e255c 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -234,7 +234,6 @@
     return;
   }
   // LOG(INFO) << "VMRuntime.preloadDexCaches static storage klass=" << class_name;
-  dex_cache->GetInitializedStaticStorage()->Set(type_idx, klass);
 }
 
 // Based on ClassLinker::ResolveField.
@@ -306,12 +305,10 @@
     uint32_t num_types;
     uint32_t num_fields;
     uint32_t num_methods;
-    uint32_t num_static_storage;
     DexCacheStats() : num_strings(0),
                       num_types(0),
                       num_fields(0),
-                      num_methods(0),
-                      num_static_storage(0) {}
+                      num_methods(0) {}
 };
 
 static const bool kPreloadDexCachesEnabled = true;
@@ -339,7 +336,6 @@
     total->num_fields += dex_file->NumFieldIds();
     total->num_methods += dex_file->NumMethodIds();
     total->num_types += dex_file->NumTypeIds();
-    total->num_static_storage += dex_file->NumTypeIds();
   }
 }
 
@@ -378,12 +374,6 @@
         filled->num_methods++;
       }
     }
-    for (size_t i = 0; i < dex_cache->NumInitializedStaticStorage(); i++) {
-      mirror::StaticStorageBase* klass = dex_cache->GetInitializedStaticStorage()->Get(i);
-      if (klass != NULL) {
-        filled->num_static_storage++;
-      }
-    }
   }
 }
 
@@ -477,10 +467,6 @@
                               total.num_fields, before.num_fields, after.num_fields);
     LOG(INFO) << StringPrintf("VMRuntime.preloadDexCaches methods total=%d before=%d after=%d",
                               total.num_methods, before.num_methods, after.num_methods);
-    LOG(INFO) << StringPrintf("VMRuntime.preloadDexCaches storage total=%d before=%d after=%d",
-                              total.num_static_storage,
-                              before.num_static_storage,
-                              after.num_static_storage);
     LOG(INFO) << StringPrintf("VMRuntime.preloadDexCaches finished");
   }
 }
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 52e74ab..caf18f1 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '1', '2', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '1', '3', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index cc996bc..084e1e2 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -63,7 +63,7 @@
 
  private:
   Thread* const self_;
-  const SirtRef<T>* obj_;
+  const SirtRef<T>* const obj_;
   DISALLOW_COPY_AND_ASSIGN(ObjectLock);
 };
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 5a28b2d..91d9b94 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -356,6 +356,25 @@
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
 }
 
+static gc::CollectorType ParseCollectorType(const std::string& option) {
+  std::vector<std::string> gc_options;
+  Split(option, ',', gc_options);
+  gc::CollectorType collector_type = gc::kCollectorTypeNone;
+  for (size_t i = 0; i < gc_options.size(); ++i) {
+    if (gc_options[i] == "MS" || gc_options[i] == "nonconcurrent") {
+      collector_type = gc::kCollectorTypeMS;
+    } else if (gc_options[i] == "CMS" || gc_options[i] == "concurrent") {
+      collector_type = gc::kCollectorTypeCMS;
+    } else if (gc_options[i] == "SS") {
+      collector_type = gc::kCollectorTypeSS;
+    } else {
+      LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_options[i];
+      return gc::kCollectorTypeNone;
+    }
+  }
+  return collector_type;
+}
+
 Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, bool ignore_unrecognized) {
   UniquePtr<ParsedOptions> parsed(new ParsedOptions());
   const char* boot_class_path_string = getenv("BOOTCLASSPATH");
@@ -381,6 +400,9 @@
   parsed->conc_gc_threads_ = 0;
   // Default is CMS which is Sticky + Partial + Full CMS GC.
   parsed->collector_type_ = gc::kCollectorTypeCMS;
+  // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
+  // parsing options.
+  parsed->background_collector_type_ = gc::kCollectorTypeNone;
   parsed->stack_size_ = 0;  // 0 means default.
   parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
@@ -570,18 +592,15 @@
     } else if (option == "-Xint") {
       parsed->interpreter_only_ = true;
     } else if (StartsWith(option, "-Xgc:")) {
-      std::vector<std::string> gc_options;
-      Split(option.substr(strlen("-Xgc:")), ',', gc_options);
-      for (size_t i = 0; i < gc_options.size(); ++i) {
-        if (gc_options[i] == "MS" || gc_options[i] == "nonconcurrent") {
-          parsed->collector_type_ = gc::kCollectorTypeMS;
-        } else if (gc_options[i] == "CMS" || gc_options[i] == "concurrent") {
-          parsed->collector_type_ = gc::kCollectorTypeCMS;
-        } else if (gc_options[i] == "SS") {
-          parsed->collector_type_ = gc::kCollectorTypeSS;
-        } else {
-          LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_options[i];
-        }
+      gc::CollectorType collector_type = ParseCollectorType(option.substr(strlen("-Xgc:")));
+      if (collector_type != gc::kCollectorTypeNone) {
+        parsed->collector_type_ = collector_type;
+      }
+    } else if (StartsWith(option, "-XX:BackgroundGC=")) {
+      gc::CollectorType collector_type = ParseCollectorType(
+          option.substr(strlen("-XX:BackgroundGC=")));
+      if (collector_type != gc::kCollectorTypeNone) {
+        parsed->background_collector_type_ = collector_type;
       }
     } else if (option == "-XX:+DisableExplicitGC") {
       parsed->is_explicit_gc_disabled_ = true;
@@ -708,7 +727,9 @@
   if (parsed->heap_growth_limit_ == 0) {
     parsed->heap_growth_limit_ = parsed->heap_maximum_size_;
   }
-
+  if (parsed->background_collector_type_ == gc::kCollectorTypeNone) {
+    parsed->background_collector_type_ = parsed->collector_type_;
+  }
   return parsed.release();
 }
 
@@ -957,6 +978,7 @@
                        options->heap_maximum_size_,
                        options->image_,
                        options->collector_type_,
+                       options->background_collector_type_,
                        options->parallel_gc_threads_,
                        options->conc_gc_threads_,
                        options->low_memory_mode_,
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 50da0dc..30ab787 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -119,6 +119,7 @@
     size_t parallel_gc_threads_;
     size_t conc_gc_threads_;
     gc::CollectorType collector_type_;
+    gc::CollectorType background_collector_type_;
     size_t stack_size_;
     size_t max_spins_before_thin_lock_inflation_;
     bool low_memory_mode_;
@@ -455,8 +456,9 @@
     return use_compile_time_class_path_;
   }
 
-  void AddMethodVerifier(verifier::MethodVerifier* verifier);
-  void RemoveMethodVerifier(verifier::MethodVerifier* verifier);
+  void AddMethodVerifier(verifier::MethodVerifier* verifier) LOCKS_EXCLUDED(method_verifier_lock_);
+  void RemoveMethodVerifier(verifier::MethodVerifier* verifier)
+      LOCKS_EXCLUDED(method_verifier_lock_);
 
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
@@ -543,7 +545,7 @@
   mirror::ObjectArray<mirror::ArtMethod>* default_imt_;
 
   // Method verifier set, used so that we can update their GC roots.
-  Mutex method_verifiers_lock_;
+  Mutex method_verifiers_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::set<verifier::MethodVerifier*> method_verifiers_;
 
   // A non-zero value indicates that a thread has been created but not yet initialized. Guarded by
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index b87a8ec..9420e7b 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -158,12 +158,12 @@
   }
 }
 
-inline size_t Thread::TLABSize() const {
+inline size_t Thread::TlabSize() const {
   return thread_local_end_ - thread_local_pos_;
 }
 
-inline mirror::Object* Thread::AllocTLAB(size_t bytes) {
-  DCHECK_GE(TLABSize(), bytes);
+inline mirror::Object* Thread::AllocTlab(size_t bytes) {
+  DCHECK_GE(TlabSize(), bytes);
   ++thread_local_objects_;
   mirror::Object* ret = reinterpret_cast<mirror::Object*>(thread_local_pos_);
   thread_local_pos_ += bytes;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index e2d51b7..7a09818 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2222,7 +2222,7 @@
   stack_end_ = stack_begin_;
 }
 
-void Thread::SetTLAB(byte* start, byte* end) {
+void Thread::SetTlab(byte* start, byte* end) {
   DCHECK_LE(start, end);
   thread_local_start_ = start;
   thread_local_pos_  = thread_local_start_;
diff --git a/runtime/thread.h b/runtime/thread.h
index 30c7e8f..0810909 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -814,10 +814,10 @@
   byte* thread_local_end_;
   size_t thread_local_objects_;
   // Returns the remaining space in the TLAB.
-  size_t TLABSize() const;
+  size_t TlabSize() const;
   // Doesn't check that there is room.
-  mirror::Object* AllocTLAB(size_t bytes);
-  void SetTLAB(byte* start, byte* end);
+  mirror::Object* AllocTlab(size_t bytes);
+  void SetTlab(byte* start, byte* end);
 
   // Thread-local rosalloc runs. There are 34 size brackets in rosalloc
   // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the
@@ -828,7 +828,7 @@
   void* rosalloc_runs_[kRosAllocNumOfSizeBrackets];
 
  private:
-  friend class Dbg;  // F or SetStateUnsafe.
+  friend class Dbg;  // For SetStateUnsafe.
   friend class Monitor;
   friend class MonitorInfo;
   friend class Runtime;  // For CreatePeer.
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 2339903..950e3f9 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -438,7 +438,7 @@
   return result;
 }
 
-std::string PrettySize(size_t byte_count) {
+std::string PrettySize(int64_t byte_count) {
   // The byte thresholds at which we display amounts.  A byte count is displayed
   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
   static const size_t kUnitThresholds[] = {
@@ -447,17 +447,20 @@
     2*1024*1024,    // MB up to...
     1024*1024*1024  // GB from here.
   };
-  static const size_t kBytesPerUnit[] = { 1, KB, MB, GB };
+  static const int64_t kBytesPerUnit[] = { 1, KB, MB, GB };
   static const char* const kUnitStrings[] = { "B", "KB", "MB", "GB" };
-
+  const char* negative_str = "";
+  if (byte_count < 0) {
+    negative_str = "-";
+    byte_count = -byte_count;
+  }
   int i = arraysize(kUnitThresholds);
   while (--i > 0) {
     if (byte_count >= kUnitThresholds[i]) {
       break;
     }
   }
-
-  return StringPrintf("%zd%s", byte_count / kBytesPerUnit[i], kUnitStrings[i]);
+  return StringPrintf("%s%lld%s", negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
 std::string PrettyDuration(uint64_t nano_duration) {
diff --git a/runtime/utils.h b/runtime/utils.h
index 4b39acd..f063c0a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -239,7 +239,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable size string such as "1MB".
-std::string PrettySize(size_t size_in_bytes);
+std::string PrettySize(int64_t size_in_bytes);
 
 // Returns a human-readable time string which prints every nanosecond while trying to limit the
 // number of trailing zeros. Prints using the largest human readable unit up to a second.
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 8df481f..4be1d02 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -285,7 +285,7 @@
 
 
   const std::string descriptor_;
-  mirror::Class* klass_;
+  mirror::Class* klass_;  // Non-const only due to moving classes.
   const uint16_t cache_id_;
 
   friend class RegTypeCache;
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index d737ff5..1ce4a04 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -297,6 +297,24 @@
         l = -5678956789L;
         i = (int) l;
         if (i != -1383989493) { return 4; }
+
+        /* long --> double */
+        l = 0x7FFFFFFFL;
+        d = (double) l;
+        if (Double.doubleToRawLongBits(d) != 0x41dfffffffc00000L) { return 5; }
+
+        l = 0xFFFFFFFFL;
+        d = (double) l;
+        if (Double.doubleToRawLongBits(d) != 0x41efffffffe00000L) { return 6; }
+
+        l = 0x7FFFFFFFFFFFFFFFL;
+        d = (double) l;
+        if (Double.doubleToRawLongBits(d) != 0x43e0000000000000L) { return 7; }
+
+        l = 0xFFFFFFFFFFFFFFFFL;
+        d = (double) l;
+        if (Double.doubleToRawLongBits(d) != 0xbff0000000000000L) { return 8; }
+
         return 0;
     }
 
diff --git a/test/303-verification-stress/expected.txt b/test/303-verification-stress/expected.txt
index cdfd6cb..4fa1b57 100644
--- a/test/303-verification-stress/expected.txt
+++ b/test/303-verification-stress/expected.txt
@@ -9,4 +9,34 @@
 Create new Test007
 Create new Test008
 Create new Test009
+Create new Test010
+Create new Test011
+Create new Test012
+Create new Test013
+Create new Test014
+Create new Test015
+Create new Test016
+Create new Test017
+Create new Test018
+Create new Test019
+Create new Test020
+Create new Test021
+Create new Test022
+Create new Test023
+Create new Test024
+Create new Test025
+Create new Test026
+Create new Test027
+Create new Test028
+Create new Test029
+Create new Test030
+Create new Test031
+Create new Test032
+Create new Test033
+Create new Test034
+Create new Test035
+Create new Test036
+Create new Test037
+Create new Test038
+Create new Test039
 Done