Merge "[optimizing] More x86_64 code improvements"
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index d29b865..4de3410 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -715,6 +715,7 @@
     // Try to find a MOVE to a vreg that wasn't changed since check_change.
     uint16_t value_name =
         data->wide_def ? lvn_->GetSregValueWide(dest_s_reg) : lvn_->GetSregValue(dest_s_reg);
+    uint32_t dest_v_reg = mir_graph_->SRegToVReg(dest_s_reg);
     for (size_t c = check_change + 1u, size = vreg_chains_.NumMIRs(); c != size; ++c) {
       MIRData* d = vreg_chains_.GetMIRData(c);
       if (d->is_move && d->wide_def == data->wide_def &&
@@ -731,8 +732,21 @@
           if (!vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg, mir_graph_) &&
               (!d->wide_def ||
                !vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg + 1, mir_graph_))) {
-            RecordPassKillMoveByRenamingSrcDef(check_change, c);
-            return;
+            // If the move's destination vreg changed, check if the vreg we're trying
+            // to rename is unused after that change.
+            uint16_t dest_change = vreg_chains_.FindFirstChangeAfter(new_dest_v_reg, c);
+            if (d->wide_def) {
+              uint16_t dest_change_high = vreg_chains_.FindFirstChangeAfter(new_dest_v_reg + 1, c);
+              if (dest_change_high != kNPos &&
+                  (dest_change == kNPos || dest_change_high < dest_change)) {
+                dest_change = dest_change_high;
+              }
+            }
+            if (dest_change == kNPos ||
+                !vreg_chains_.IsVRegUsed(dest_change + 1u, size, dest_v_reg, mir_graph_)) {
+              RecordPassKillMoveByRenamingSrcDef(check_change, c);
+              return;
+            }
           }
         }
       }
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index 6ba91b6..4df0a8b 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -1933,6 +1933,78 @@
   }
 }
 
+TEST_F(GvnDeadCodeEliminationTestSimple, LongOverlaps2) {
+  static const MIRDef mirs[] = {
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 4u, 2u),
+  };
+
+  // The last insn should overlap the first and second.
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2, 4 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, true, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the CONST_WIDE registers have been correctly renamed.
+  MIR* const_wide = &mirs_[0];
+  ASSERT_EQ(2u, const_wide->ssa_rep->num_defs);
+  EXPECT_EQ(4, const_wide->ssa_rep->defs[0]);
+  EXPECT_EQ(5, const_wide->ssa_rep->defs[1]);
+  EXPECT_EQ(1u, const_wide->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, LongOverlaps3) {
+  static const MIRDef mirs[] = {
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 4u, 2u),
+  };
+
+  // The last insn should overlap the first and second.
+  static const int32_t sreg_to_vreg_map[] = { 2, 3, 0, 1, 1, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2, 4 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, true, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the CONST_WIDE registers have been correctly renamed.
+  MIR* const_wide = &mirs_[0];
+  ASSERT_EQ(2u, const_wide->ssa_rep->num_defs);
+  EXPECT_EQ(4, const_wide->ssa_rep->defs[0]);
+  EXPECT_EQ(5, const_wide->ssa_rep->defs[1]);
+  EXPECT_EQ(1u, const_wide->dalvikInsn.vA);
+}
+
 TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) {
   static const MIRDef mirs[] = {
       DEF_CONST(3, Instruction::CONST, 0u, 1000u),
@@ -2093,4 +2165,37 @@
   }
 }
 
+TEST_F(GvnDeadCodeEliminationTestSimple, Dependancy) {
+  static const MIRDef mirs[] = {
+      DEF_MOVE(3, Instruction::MOVE, 5u, 1u),                 // move v5,v1
+      DEF_MOVE(3, Instruction::MOVE, 6u, 1u),                 // move v12,v1
+      DEF_MOVE(3, Instruction::MOVE, 7u, 0u),                 // move v13,v0
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 8u, 2u),       // move v0_1,v2_3
+      DEF_MOVE(3, Instruction::MOVE, 10u, 6u),                // move v3,v12
+      DEF_MOVE(3, Instruction::MOVE, 11u, 4u),                // move v2,v4
+      DEF_MOVE(3, Instruction::MOVE, 12u, 7u),                // move v4,v13
+      DEF_MOVE(3, Instruction::MOVE, 13, 11u),                // move v12,v2
+      DEF_MOVE(3, Instruction::MOVE, 14u, 10u),               // move v2,v3
+      DEF_MOVE(3, Instruction::MOVE, 15u, 5u),                // move v3,v5
+      DEF_MOVE(3, Instruction::MOVE, 16u, 12u),               // move v5,v4
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 12, 13, 0, 1, 3, 2, 4, 12, 2, 3, 5 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 2, 8 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, true, true, false, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index b098bc2..ec4bad7 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -49,9 +49,11 @@
 static constexpr RegStorage core_temps_arr_32[] =
     {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0_32, rs_rT1_32, rs_rT2_32, rs_rT3_32,
      rs_rT4_32, rs_rT5_32, rs_rT6_32, rs_rT7_32, rs_rT8};
-static constexpr RegStorage sp_temps_arr_32[] =
+static constexpr RegStorage sp_fr0_temps_arr_32[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
+static constexpr RegStorage sp_fr1_temps_arr_32[] =
+    {rs_rF0, rs_rF2, rs_rF4, rs_rF6, rs_rF8, rs_rF10, rs_rF12, rs_rF14};
 static constexpr RegStorage dp_fr0_temps_arr_32[] =
     {rs_rD0_fr0, rs_rD1_fr0, rs_rD2_fr0, rs_rD3_fr0, rs_rD4_fr0, rs_rD5_fr0, rs_rD6_fr0,
      rs_rD7_fr0};
@@ -130,7 +132,8 @@
 static constexpr ArrayRef<const RegStorage> dp_fr1_regs_32(dp_fr1_regs_arr_32);
 static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
 static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32);
-static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> sp_fr0_temps_32(sp_fr0_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> sp_fr1_temps_32(sp_fr1_temps_arr_32);
 static constexpr ArrayRef<const RegStorage> dp_fr0_temps_32(dp_fr0_temps_arr_32);
 static constexpr ArrayRef<const RegStorage> dp_fr1_temps_32(dp_fr1_temps_arr_32);
 
@@ -591,22 +594,22 @@
     Clobber(rs_rFP);
     Clobber(rs_rRA);
     Clobber(rs_rF0);
-    Clobber(rs_rF1);
     Clobber(rs_rF2);
-    Clobber(rs_rF3);
     Clobber(rs_rF4);
-    Clobber(rs_rF5);
     Clobber(rs_rF6);
-    Clobber(rs_rF7);
     Clobber(rs_rF8);
-    Clobber(rs_rF9);
     Clobber(rs_rF10);
-    Clobber(rs_rF11);
     Clobber(rs_rF12);
-    Clobber(rs_rF13);
     Clobber(rs_rF14);
-    Clobber(rs_rF15);
     if (fpuIs32Bit_) {
+      Clobber(rs_rF1);
+      Clobber(rs_rF3);
+      Clobber(rs_rF5);
+      Clobber(rs_rF7);
+      Clobber(rs_rF9);
+      Clobber(rs_rF11);
+      Clobber(rs_rF13);
+      Clobber(rs_rF15);
       Clobber(rs_rD0_fr0);
       Clobber(rs_rD1_fr0);
       Clobber(rs_rD2_fr0);
@@ -717,24 +720,26 @@
                                               fpuIs32Bit_ ? dp_fr0_regs_32 : dp_fr1_regs_32,
                                               reserved_regs_32, empty_pool,  // reserved64
                                               core_temps_32, empty_pool,  // core64_temps
-                                              sp_temps_32,
+                                              fpuIs32Bit_ ? sp_fr0_temps_32 : sp_fr1_temps_32,
                                               fpuIs32Bit_ ? dp_fr0_temps_32 : dp_fr1_temps_32));
 
     // Alias single precision floats to appropriate half of overlapping double.
     for (RegisterInfo* info : reg_pool_->sp_regs_) {
       int sp_reg_num = info->GetReg().GetRegNum();
       int dp_reg_num = sp_reg_num & ~1;
-      RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
-      RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
-      // Double precision register's master storage should refer to itself.
-      DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
-      // Redirect single precision's master storage to master.
-      info->SetMaster(dp_reg_info);
-      // Singles should show a single 32-bit mask bit, at first referring to the low half.
-      DCHECK_EQ(info->StorageMask(), 0x1U);
-      if (sp_reg_num & 1) {
-        // For odd singles, change to user the high word of the backing double.
-        info->SetStorageMask(0x2);
+      if (fpuIs32Bit_ || (sp_reg_num == dp_reg_num)) {
+        RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+        RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+        // Double precision register's master storage should refer to itself.
+        DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+        // Redirect single precision's master storage to master.
+        info->SetMaster(dp_reg_info);
+        // Singles should show a single 32-bit mask bit, at first referring to the low half.
+        DCHECK_EQ(info->StorageMask(), 0x1U);
+        if (sp_reg_num & 1) {
+          // For odd singles, change to user the high word of the backing double.
+          info->SetStorageMask(0x2);
+        }
       }
     }
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 4ce3129..a03ff75 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -972,8 +972,10 @@
   size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
   native_object_relocations_.emplace(&image_method_array_,
                                      NativeObjectRelocation { offset, image_method_type });
-  CHECK_EQ(sizeof(image_method_array_), 8u);
-  offset += sizeof(image_method_array_);
+  const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize(
+      0, ArtMethod::ObjectSize(target_ptr_size_));
+  CHECK_ALIGNED(array_size, 8u);
+  offset += array_size;
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
     CHECK(m->IsRuntimeMethod());
@@ -1203,7 +1205,7 @@
     if (elem != nullptr) {
       auto it = native_object_relocations_.find(elem);
       if (it == native_object_relocations_.end()) {
-        if (true) {
+        if (it->second.IsArtMethodRelocation()) {
           auto* method = reinterpret_cast<ArtMethod*>(elem);
           LOG(FATAL) << "No relocation entry for ArtMethod " << PrettyMethod(method) << " @ "
               << method << " idx=" << i << "/" << num_elements << " with declaring class "
@@ -1300,8 +1302,8 @@
     return nullptr;
   }
   auto it = native_object_relocations_.find(obj);
-  const NativeObjectRelocation& relocation = it->second;
   CHECK(it != native_object_relocations_.end()) << obj;
+  const NativeObjectRelocation& relocation = it->second;
   return reinterpret_cast<void*>(image_begin_ + relocation.offset);
 }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index eb6aa6f..f4e10cc 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -381,7 +381,8 @@
 
   // Runtime ArtMethods which aren't reachable from any Class but need to be copied into the image.
   ArtMethod* image_methods_[ImageHeader::kImageMethodsCount];
-  // Fake length prefixed array for image methods.
+  // Fake length prefixed array for image methods. This array does not contain the actual
+  // ArtMethods. We only use it for the header and relocation addresses.
   LengthPrefixedArray<ArtMethod> image_method_array_;
 
   // Counters for measurements, used for logging only.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d89d2b2..6c0292c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2789,6 +2789,9 @@
   Location value = locations->InAt(0);
 
   switch (instruction->GetType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
         __ CompareAndBranchIfZero(value.AsRegister<Register>(), slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7fab5cf..b44c5ba 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2012,8 +2012,8 @@
 
   Primitive::Type type = instruction->GetType();
 
-  if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) {
-      LOG(FATAL) << "Unexpected type " << type << "for DivZeroCheck.";
+  if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) {
+      LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index b6d67de..b6ebeb4 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1908,8 +1908,9 @@
 
   Primitive::Type type = instruction->GetType();
 
-  if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) {
+  if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) {
       LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+    return;
   }
 
   if (value.IsConstant()) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 5ffab33..4efdbb9 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2995,6 +2995,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   switch (instruction->GetType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::Any());
       break;
@@ -3022,6 +3025,9 @@
   Location value = locations->InAt(0);
 
   switch (instruction->GetType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
         __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a0f45ed..1585104 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3161,6 +3161,9 @@
   Location value = locations->InAt(0);
 
   switch (instruction->GetType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0106595..4c74679 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -276,12 +276,12 @@
     nullptr,
     caller_compilation_unit_.GetClassLoader(),
     class_linker,
-    *resolved_method->GetDexFile(),
+    callee_dex_file,
     code_item,
     resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
-    resolved_method->GetDexMethodIndex(),
+    method_index,
     resolved_method->GetAccessFlags(),
-    nullptr);
+    compiler_driver_->GetVerifiedMethod(&callee_dex_file, method_index));
 
   bool requires_ctor_barrier = false;
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 814cebb..ca2c998 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -3298,6 +3298,8 @@
     SetRawInputAt(0, value);
   }
 
+  Primitive::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); }
+
   bool CanBeMoved() const OVERRIDE { return true; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7821da3..0886e32 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4063,10 +4063,10 @@
     // Retire the temporary class and create the correctly sized resolved class.
     StackHandleScope<1> hs(self);
     auto h_new_class = hs.NewHandle(klass->CopyOf(self, class_size, imt, image_pointer_size_));
-    // Set array lengths to 0 since we don't want the GC to visit two different classes with the
-    // same ArtFields with the same If this occurs, it causes bugs in remembered sets since the GC
-    // may not see any references to the from space and clean the card. Though there was references
-    // to the from space that got marked by the first class.
+    // Set arrays to null since we don't want to have multiple classes with the same ArtField or
+    // ArtMethod array pointers. If this occurs, it causes bugs in remembered sets since the GC
+    // may not see any references to the target space and clean the card for a class if another
+    // class had the same array pointer.
     klass->SetDirectMethodsPtrUnchecked(nullptr);
     klass->SetVirtualMethodsPtr(nullptr);
     klass->SetSFieldsPtrUnchecked(nullptr);
@@ -4973,8 +4973,8 @@
         LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count, method_size) : 0u;
     const size_t new_size = LengthPrefixedArray<ArtMethod>::ComputeSize(new_method_count,
                                                                         method_size);
-    auto* virtuals = new(runtime->GetLinearAlloc()->Realloc(
-        self, old_virtuals, old_size, new_size))LengthPrefixedArray<ArtMethod>(new_method_count);
+    auto* virtuals = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
+        runtime->GetLinearAlloc()->Realloc(self, old_virtuals, old_size, new_size));
     if (UNLIKELY(virtuals == nullptr)) {
       self->AssertPendingOOMException();
       self->EndAssertNoThreadSuspension(old_cause);
@@ -5002,6 +5002,7 @@
       move_table.emplace(mir_method, &*out);
       ++out;
     }
+    virtuals->SetLength(new_method_count);
     UpdateClassVirtualMethods(klass.Get(), virtuals);
     // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index ec689f8..8e329d6 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -229,7 +229,7 @@
     CHECK(thread == self);
     Locks::mutator_lock_->AssertExclusiveHeld(self);
     cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
-    cc->SwapStacks(self);
+    cc->SwapStacks();
     if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
       cc->RecordLiveStackFreezeSize(self);
       cc->from_space_num_objects_at_first_pause_ = cc->region_space_->GetObjectsAllocated();
@@ -275,8 +275,8 @@
   }
 }
 
-void ConcurrentCopying::SwapStacks(Thread* self) {
-  heap_->SwapStacks(self);
+void ConcurrentCopying::SwapStacks() {
+  heap_->SwapStacks();
 }
 
 void ConcurrentCopying::RecordLiveStackFreezeSize(Thread* self) {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index a4fd71c..f382448 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -172,7 +172,7 @@
   mirror::Object* GetFwdPtr(mirror::Object* from_ref)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FlipThreadRoots() REQUIRES(!Locks::mutator_lock_);
-  void SwapStacks(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
+  void SwapStacks() SHARED_REQUIRES(Locks::mutator_lock_);
   void RecordLiveStackFreezeSize(Thread* self);
   void ComputeUnevacFromSpaceLiveRatio();
   void LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset)
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 4b2c588..94ffe6e 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -191,7 +191,7 @@
     heap_->RevokeAllThreadLocalAllocationStacks(self);
   }
   t.NewTiming("SwapStacks");
-  heap_->SwapStacks(self);
+  heap_->SwapStacks();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     MarkRoots();
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 7f2c204..e2bcca2 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -190,7 +190,7 @@
   {
     TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap_->SwapStacks(self);
+    heap_->SwapStacks();
     live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
     // Need to revoke all the thread local allocation stacks since we just swapped the allocation
     // stacks and don't want anybody to allocate into the live stack.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 63def24..c11c134 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -226,7 +226,7 @@
     TimingLogger::ScopedTiming t2("RevokeAllThreadLocalAllocationStacks", GetTimings());
     heap_->RevokeAllThreadLocalAllocationStacks(self_);
   }
-  heap_->SwapStacks(self_);
+  heap_->SwapStacks();
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     MarkRoots();
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index cb750eb..b814432 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -39,8 +39,10 @@
 namespace gc {
 
 template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
-inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Class* klass,
-                                                      size_t byte_count, AllocatorType allocator,
+inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self,
+                                                      mirror::Class* klass,
+                                                      size_t byte_count,
+                                                      AllocatorType allocator,
                                                       const PreFenceVisitor& pre_fence_visitor) {
   if (kIsDebugBuild) {
     CheckPreconditionsForAllocObject(klass, byte_count);
@@ -209,7 +211,8 @@
 }
 
 template <bool kInstrumented, typename PreFenceVisitor>
-inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class** klass,
+inline mirror::Object* Heap::AllocLargeObject(Thread* self,
+                                              mirror::Class** klass,
                                               size_t byte_count,
                                               const PreFenceVisitor& pre_fence_visitor) {
   // Save and restore the class in case it moves.
@@ -221,11 +224,14 @@
 }
 
 template <const bool kInstrumented, const bool kGrow>
-inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
-                                           size_t alloc_size, size_t* bytes_allocated,
+inline mirror::Object* Heap::TryToAllocate(Thread* self,
+                                           AllocatorType allocator_type,
+                                           size_t alloc_size,
+                                           size_t* bytes_allocated,
                                            size_t* usable_size,
                                            size_t* bytes_tl_bulk_allocated) {
-  if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB &&
+  if (allocator_type != kAllocatorTypeTLAB &&
+      allocator_type != kAllocatorTypeRegionTLAB &&
       allocator_type != kAllocatorTypeRosAlloc &&
       UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
@@ -390,7 +396,8 @@
     // Only if the allocation succeeded, record the time.
     if (allocated_obj != nullptr) {
       uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
-      heap_->total_allocation_time_.FetchAndAddSequentiallyConsistent(allocation_end_time - allocation_start_time_);
+      heap_->total_allocation_time_.FetchAndAddSequentiallyConsistent(
+          allocation_end_time - allocation_start_time_);
     }
   }
 }
@@ -423,7 +430,8 @@
   return false;
 }
 
-inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
+inline void Heap::CheckConcurrentGC(Thread* self,
+                                    size_t new_num_bytes_allocated,
                                     mirror::Object** obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
     RequestConcurrentGCAndSaveObject(self, false, obj);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 59e39df..141fed2 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -113,18 +113,34 @@
 // timeout on how long we wait for finalizers to run. b/21544853
 static constexpr uint64_t kNativeAllocationFinalizeTimeout = MsToNs(250u);
 
-Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
-           double target_utilization, double foreground_heap_growth_multiplier,
-           size_t capacity, size_t non_moving_space_capacity, const std::string& image_file_name,
-           const InstructionSet image_instruction_set, CollectorType foreground_collector_type,
+Heap::Heap(size_t initial_size,
+           size_t growth_limit,
+           size_t min_free,
+           size_t max_free,
+           double target_utilization,
+           double foreground_heap_growth_multiplier,
+           size_t capacity,
+           size_t non_moving_space_capacity,
+           const std::string& image_file_name,
+           const InstructionSet image_instruction_set,
+           CollectorType foreground_collector_type,
            CollectorType background_collector_type,
-           space::LargeObjectSpaceType large_object_space_type, size_t large_object_threshold,
-           size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
-           size_t long_pause_log_threshold, size_t long_gc_log_threshold,
-           bool ignore_max_footprint, bool use_tlab,
-           bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
-           bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-           bool verify_post_gc_rosalloc, bool gc_stress_mode,
+           space::LargeObjectSpaceType large_object_space_type,
+           size_t large_object_threshold,
+           size_t parallel_gc_threads,
+           size_t conc_gc_threads,
+           bool low_memory_mode,
+           size_t long_pause_log_threshold,
+           size_t long_gc_log_threshold,
+           bool ignore_max_footprint,
+           bool use_tlab,
+           bool verify_pre_gc_heap,
+           bool verify_pre_sweeping_heap,
+           bool verify_post_gc_heap,
+           bool verify_pre_gc_rosalloc,
+           bool verify_pre_sweeping_rosalloc,
+           bool verify_post_gc_rosalloc,
+           bool gc_stress_mode,
            bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
@@ -526,8 +542,10 @@
   }
 }
 
-MemMap* Heap::MapAnonymousPreferredAddress(const char* name, uint8_t* request_begin,
-                                           size_t capacity, std::string* out_error_str) {
+MemMap* Heap::MapAnonymousPreferredAddress(const char* name,
+                                           uint8_t* request_begin,
+                                           size_t capacity,
+                                           std::string* out_error_str) {
   while (true) {
     MemMap* map = MemMap::MapAnonymous(name, request_begin, capacity,
                                        PROT_READ | PROT_WRITE, true, false, out_error_str);
@@ -543,9 +561,12 @@
   return foreground_collector_type_ == type || background_collector_type_ == type;
 }
 
-space::MallocSpace* Heap::CreateMallocSpaceFromMemMap(MemMap* mem_map, size_t initial_size,
-                                                      size_t growth_limit, size_t capacity,
-                                                      const char* name, bool can_move_objects) {
+space::MallocSpace* Heap::CreateMallocSpaceFromMemMap(MemMap* mem_map,
+                                                      size_t initial_size,
+                                                      size_t growth_limit,
+                                                      size_t capacity,
+                                                      const char* name,
+                                                      bool can_move_objects) {
   space::MallocSpace* malloc_space = nullptr;
   if (kUseRosAlloc) {
     // Create rosalloc space.
@@ -1494,8 +1515,10 @@
   return nullptr;
 }
 
-mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator,
-                                             size_t alloc_size, size_t* bytes_allocated,
+mirror::Object* Heap::AllocateInternalWithGc(Thread* self,
+                                             AllocatorType allocator,
+                                             size_t alloc_size,
+                                             size_t* bytes_allocated,
                                              size_t* usable_size,
                                              size_t* bytes_tl_bulk_allocated,
                                              mirror::Class** klass) {
@@ -1694,10 +1717,12 @@
 
 class InstanceCounter {
  public:
-  InstanceCounter(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from, uint64_t* counts)
+  InstanceCounter(const std::vector<mirror::Class*>& classes,
+                  bool use_is_assignable_from,
+                  uint64_t* counts)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : classes_(classes), use_is_assignable_from_(use_is_assignable_from), counts_(counts) {
-  }
+      : classes_(classes), use_is_assignable_from_(use_is_assignable_from), counts_(counts) {}
+
   static void Callback(mirror::Object* obj, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     InstanceCounter* instance_counter = reinterpret_cast<InstanceCounter*>(arg);
@@ -1753,7 +1778,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstanceCollector);
 };
 
-void Heap::GetInstances(mirror::Class* c, int32_t max_count,
+void Heap::GetInstances(mirror::Class* c,
+                        int32_t max_count,
                         std::vector<mirror::Object*>& instances) {
   InstanceCollector collector(c, max_count, instances);
   VisitObjects(&InstanceCollector::Callback, &collector);
@@ -1761,7 +1787,8 @@
 
 class ReferringObjectsFinder {
  public:
-  ReferringObjectsFinder(mirror::Object* object, int32_t max_count,
+  ReferringObjectsFinder(mirror::Object* object,
+                         int32_t max_count,
                          std::vector<mirror::Object*>& referring_objects)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : object_(object), max_count_(max_count), referring_objects_(referring_objects) {
@@ -2081,8 +2108,7 @@
 // Special compacting collector which uses sub-optimal bin packing to reduce zygote space size.
 class ZygoteCompactingCollector FINAL : public collector::SemiSpace {
  public:
-  explicit ZygoteCompactingCollector(gc::Heap* heap,
-                                     bool is_running_on_memory_tool)
+  explicit ZygoteCompactingCollector(gc::Heap* heap, bool is_running_on_memory_tool)
       : SemiSpace(heap, false, "zygote collector"),
         bin_live_bitmap_(nullptr),
         bin_mark_bitmap_(nullptr),
@@ -2135,10 +2161,9 @@
     }
   }
 
-  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const {
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space ATTRIBUTE_UNUSED) const {
     // Don't sweep any spaces since we probably blasted the internal accounting of the free list
     // allocator.
-    UNUSED(space);
     return false;
   }
 
@@ -2380,7 +2405,8 @@
   }
 }
 
-collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCause gc_cause,
+collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type,
+                                               GcCause gc_cause,
                                                bool clear_soft_references) {
   Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
@@ -2759,8 +2785,7 @@
 class VerifyObjectVisitor {
  public:
   explicit VerifyObjectVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
-      : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {
-  }
+      : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {}
 
   void operator()(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
@@ -2912,7 +2937,7 @@
           if (!obj->IsObjectArray()) {
             mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
             CHECK(klass != nullptr);
-            for (ArtField& field : is_static ? klass->GetSFields() : klass->GetIFields()) {
+            for (ArtField& field : (is_static ? klass->GetSFields() : klass->GetIFields())) {
               if (field.GetOffset().Int32Value() == offset.Int32Value()) {
                 LOG(ERROR) << (is_static ? "Static " : "") << "field in the live stack is "
                            << PrettyField(&field);
@@ -2980,8 +3005,7 @@
   return !visitor.Failed();
 }
 
-void Heap::SwapStacks(Thread* self) {
-  UNUSED(self);
+void Heap::SwapStacks() {
   if (kUseThreadLocalAllocationStack) {
     live_stack_->AssertAllZero();
   }
@@ -3034,7 +3058,9 @@
   return it->second;
 }
 
-void Heap::ProcessCards(TimingLogger* timings, bool use_rem_sets, bool process_alloc_space_cards,
+void Heap::ProcessCards(TimingLogger* timings,
+                        bool use_rem_sets,
+                        bool process_alloc_space_cards,
                         bool clear_alloc_space_cards) {
   TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   // Clear cards and keep track of cards cleared in the mod-union table.
@@ -3094,11 +3120,11 @@
   if (verify_missing_card_marks_) {
     TimingLogger::ScopedTiming t2("(Paused)PreGcVerifyMissingCardMarks", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    SwapStacks(self);
+    SwapStacks();
     // Sort the live stack so that we can quickly binary search it later.
     CHECK(VerifyMissingCardMarks()) << "Pre " << gc->GetName()
                                     << " missing card mark verification failed\n" << DumpSpaces();
-    SwapStacks(self);
+    SwapStacks();
   }
   if (verify_mod_union_table_) {
     TimingLogger::ScopedTiming t2("(Paused)PreGcVerifyModUnionTables", timings);
@@ -3119,8 +3145,7 @@
   }
 }
 
-void Heap::PrePauseRosAllocVerification(collector::GarbageCollector* gc) {
-  UNUSED(gc);
+void Heap::PrePauseRosAllocVerification(collector::GarbageCollector* gc ATTRIBUTE_UNUSED) {
   // TODO: Add a new runtime option for this?
   if (verify_pre_gc_rosalloc_) {
     RosAllocVerification(current_gc_iteration_.GetTimings(), "PreGcRosAllocVerification");
@@ -3486,7 +3511,8 @@
 
 class Heap::CollectorTransitionTask : public HeapTask {
  public:
-  explicit CollectorTransitionTask(uint64_t target_time) : HeapTask(target_time) { }
+  explicit CollectorTransitionTask(uint64_t target_time) : HeapTask(target_time) {}
+
   virtual void Run(Thread* self) OVERRIDE {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     heap->DoPendingCollectorTransition();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 09c18b8..6676049 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -96,11 +96,7 @@
 class AgeCardVisitor {
  public:
   uint8_t operator()(uint8_t card) const {
-    if (card == accounting::CardTable::kCardDirty) {
-      return card - 1;
-    } else {
-      return 0;
-    }
+    return (card == accounting::CardTable::kCardDirty) ? card - 1 : 0;
   }
 };
 
@@ -165,20 +161,34 @@
   // Create a heap with the requested sizes. The possible empty
   // image_file_names names specify Spaces to load based on
   // ImageWriter output.
-  explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
-                size_t max_free, double target_utilization,
-                double foreground_heap_growth_multiplier, size_t capacity,
+  explicit Heap(size_t initial_size,
+                size_t growth_limit,
+                size_t min_free,
+                size_t max_free,
+                double target_utilization,
+                double foreground_heap_growth_multiplier,
+                size_t capacity,
                 size_t non_moving_space_capacity,
                 const std::string& original_image_file_name,
                 InstructionSet image_instruction_set,
-                CollectorType foreground_collector_type, CollectorType background_collector_type,
-                space::LargeObjectSpaceType large_object_space_type, size_t large_object_threshold,
-                size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
-                size_t long_pause_threshold, size_t long_gc_threshold,
-                bool ignore_max_footprint, bool use_tlab,
-                bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
-                bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-                bool verify_post_gc_rosalloc, bool gc_stress_mode,
+                CollectorType foreground_collector_type,
+                CollectorType background_collector_type,
+                space::LargeObjectSpaceType large_object_space_type,
+                size_t large_object_threshold,
+                size_t parallel_gc_threads,
+                size_t conc_gc_threads,
+                bool low_memory_mode,
+                size_t long_pause_threshold,
+                size_t long_gc_threshold,
+                bool ignore_max_footprint,
+                bool use_tlab,
+                bool verify_pre_gc_heap,
+                bool verify_pre_sweeping_heap,
+                bool verify_post_gc_heap,
+                bool verify_pre_gc_rosalloc,
+                bool verify_pre_sweeping_rosalloc,
+                bool verify_post_gc_rosalloc,
+                bool gc_stress_mode,
                 bool use_homogeneous_space_compaction,
                 uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
@@ -186,7 +196,9 @@
 
   // Allocates and initializes storage for an object instance.
   template <bool kInstrumented, typename PreFenceVisitor>
-  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes,
+  mirror::Object* AllocObject(Thread* self,
+                              mirror::Class* klass,
+                              size_t num_bytes,
                               const PreFenceVisitor& pre_fence_visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_,
@@ -196,7 +208,9 @@
   }
 
   template <bool kInstrumented, typename PreFenceVisitor>
-  mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes,
+  mirror::Object* AllocNonMovableObject(Thread* self,
+                                        mirror::Class* klass,
+                                        size_t num_bytes,
                                         const PreFenceVisitor& pre_fence_visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_,
@@ -206,9 +220,11 @@
   }
 
   template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
-  ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(
-      Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator,
-      const PreFenceVisitor& pre_fence_visitor)
+  ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(Thread* self,
+                                                         mirror::Class* klass,
+                                                         size_t byte_count,
+                                                         AllocatorType allocator,
+                                                         const PreFenceVisitor& pre_fence_visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_,
                !Roles::uninterruptible_);
@@ -263,8 +279,7 @@
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
   // meaningful diagnostics.
-  bool IsValidObjectAddress(const mirror::Object* obj) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool IsValidObjectAddress(const mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Faster alternative to IsHeapAddress since finding if an object is in the large object space is
   // very slow.
@@ -273,8 +288,10 @@
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
-  bool IsLiveObjectLocked(mirror::Object* obj, bool search_allocation_stack = true,
-                          bool search_live_stack = true, bool sorted = false)
+  bool IsLiveObjectLocked(mirror::Object* obj,
+                          bool search_allocation_stack = true,
+                          bool search_live_stack = true,
+                          bool sorted = false)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Returns true if there is any chance that the object (obj) will move.
@@ -298,7 +315,8 @@
 
   // Implements VMDebug.countInstancesOfClass and JDWP VM_InstanceCount.
   // The boolean decides whether to use IsAssignableFrom or == when comparing classes.
-  void CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
+  void CountInstances(const std::vector<mirror::Class*>& classes,
+                      bool use_is_assignable_from,
                       uint64_t* counts)
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -307,7 +325,8 @@
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
   // Implements JDWP OR_ReferringObjects.
-  void GetReferringObjects(mirror::Object* o, int32_t max_count,
+  void GetReferringObjects(mirror::Object* o,
+                           int32_t max_count,
                            std::vector<mirror::Object*>& referring_objects)
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -347,8 +366,7 @@
 
   // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
   // waited for.
-  collector::GcType WaitForGcToComplete(GcCause cause, Thread* self)
-      REQUIRES(!*gc_complete_lock_);
+  collector::GcType WaitForGcToComplete(GcCause cause, Thread* self) REQUIRES(!*gc_complete_lock_);
 
   // Update the heap's process state to a new value, may cause compaction to occur.
   void UpdateProcessState(ProcessState process_state)
@@ -405,14 +423,17 @@
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if null is stored in the field.
-  ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/,
-                                       const mirror::Object* /*new_value*/) {
+  ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst,
+                                       MemberOffset offset ATTRIBUTE_UNUSED,
+                                       const mirror::Object* new_value ATTRIBUTE_UNUSED) {
     card_table_->MarkCard(dst);
   }
 
   // Write barrier for array operations that update many field positions
-  ALWAYS_INLINE void WriteBarrierArray(const mirror::Object* dst, int /*start_offset*/,
-                                       size_t /*length TODO: element_count or byte_count?*/) {
+  ALWAYS_INLINE void WriteBarrierArray(const mirror::Object* dst,
+                                       int start_offset ATTRIBUTE_UNUSED,
+                                       // TODO: element_count or byte_count?
+                                       size_t length ATTRIBUTE_UNUSED) {
     card_table_->MarkCard(dst);
   }
 
@@ -436,7 +457,8 @@
   }
 
   // Returns the number of objects currently allocated.
-  size_t GetObjectsAllocated() const REQUIRES(!Locks::heap_bitmap_lock_);
+  size_t GetObjectsAllocated() const
+      REQUIRES(!Locks::heap_bitmap_lock_);
 
   // Returns the total number of objects allocated since the heap was created.
   uint64_t GetObjectsAllocatedEver() const;
@@ -540,11 +562,13 @@
                       accounting::SpaceBitmap<kObjectAlignment>* bitmap2,
                       accounting::SpaceBitmap<kLargeObjectAlignment>* large_objects,
                       accounting::ObjectStack* stack)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(Locks::heap_bitmap_lock_);
 
   // Mark the specified allocation stack as live.
   void MarkAllocStackAsLive(accounting::ObjectStack* stack)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(Locks::heap_bitmap_lock_);
 
   // Unbind any bound bitmaps.
   void UnBindBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
@@ -758,15 +782,20 @@
         allocator_type != kAllocatorTypeTLAB;
   }
   static bool IsMovingGc(CollectorType collector_type) {
-    return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
-        collector_type == kCollectorTypeCC || collector_type == kCollectorTypeMC ||
+    return
+        collector_type == kCollectorTypeSS ||
+        collector_type == kCollectorTypeGSS ||
+        collector_type == kCollectorTypeCC ||
+        collector_type == kCollectorTypeMC ||
         collector_type == kCollectorTypeHomogeneousSpaceCompact;
   }
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
+  ALWAYS_INLINE void CheckConcurrentGC(Thread* self,
+                                       size_t new_num_bytes_allocated,
                                        mirror::Object** obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*pending_task_lock_, !*gc_complete_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!*pending_task_lock_, !*gc_complete_lock_);
 
   accounting::ObjectStack* GetMarkStack() {
     return mark_stack_.get();
@@ -774,22 +803,29 @@
 
   // We don't force this to be inlined since it is a slow path.
   template <bool kInstrumented, typename PreFenceVisitor>
-  mirror::Object* AllocLargeObject(Thread* self, mirror::Class** klass, size_t byte_count,
+  mirror::Object* AllocLargeObject(Thread* self,
+                                   mirror::Class** klass,
+                                   size_t byte_count,
                                    const PreFenceVisitor& pre_fence_visitor)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_);
 
   // Handles Allocate()'s slow allocation path with GC involved after
   // an initial allocation attempt failed.
-  mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes,
-                                         size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* AllocateInternalWithGc(Thread* self,
+                                         AllocatorType allocator,
+                                         size_t num_bytes,
+                                         size_t* bytes_allocated,
+                                         size_t* usable_size,
                                          size_t* bytes_tl_bulk_allocated,
                                          mirror::Class** klass)
       REQUIRES(!Locks::thread_suspend_count_lock_, !*gc_complete_lock_, !*pending_task_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Allocate into a specific space.
-  mirror::Object* AllocateInto(Thread* self, space::AllocSpace* space, mirror::Class* c,
+  mirror::Object* AllocateInto(Thread* self,
+                               space::AllocSpace* space,
+                               mirror::Class* c,
                                size_t bytes)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -800,8 +836,10 @@
   // Try to allocate a number of bytes, this function never does any GCs. Needs to be inlined so
   // that the switch statement is constant optimized in the entrypoints.
   template <const bool kInstrumented, const bool kGrow>
-  ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type,
-                                              size_t alloc_size, size_t* bytes_allocated,
+  ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self,
+                                              AllocatorType allocator_type,
+                                              size_t alloc_size,
+                                              size_t* bytes_allocated,
                                               size_t* usable_size,
                                               size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -828,12 +866,14 @@
       REQUIRES(!*pending_task_lock_);
 
   void RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*pending_task_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!*pending_task_lock_);
   bool IsGCRequestPending() const;
 
   // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns
   // which type of Gc was actually ran.
-  collector::GcType CollectGarbageInternal(collector::GcType gc_plan, GcCause gc_cause,
+  collector::GcType CollectGarbageInternal(collector::GcType gc_plan,
+                                           GcCause gc_cause,
                                            bool clear_soft_references)
       REQUIRES(!*gc_complete_lock_, !Locks::heap_bitmap_lock_, !Locks::thread_suspend_count_lock_,
                !*pending_task_lock_);
@@ -862,13 +902,18 @@
   HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact() REQUIRES(!*gc_complete_lock_);
 
   // Create the main free list malloc space, either a RosAlloc space or DlMalloc space.
-  void CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
+  void CreateMainMallocSpace(MemMap* mem_map,
+                             size_t initial_size,
+                             size_t growth_limit,
                              size_t capacity);
 
   // Create a malloc space based on a mem map. Does not set the space as default.
-  space::MallocSpace* CreateMallocSpaceFromMemMap(MemMap* mem_map, size_t initial_size,
-                                                  size_t growth_limit, size_t capacity,
-                                                  const char* name, bool can_move_objects);
+  space::MallocSpace* CreateMallocSpaceFromMemMap(MemMap* mem_map,
+                                                  size_t initial_size,
+                                                  size_t growth_limit,
+                                                  size_t capacity,
+                                                  const char* name,
+                                                  bool can_move_objects);
 
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
@@ -883,21 +928,26 @@
       SHARED_REQUIRES(Locks::heap_bitmap_lock_);
 
   // Swap the allocation stack with the live stack.
-  void SwapStacks(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
+  void SwapStacks() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Clear cards and update the mod union table. When process_alloc_space_cards is true,
   // if clear_alloc_space_cards is true, then we clear cards instead of ageing them. We do
   // not process the alloc space if process_alloc_space_cards is false.
-  void ProcessCards(TimingLogger* timings, bool use_rem_sets, bool process_alloc_space_cards,
+  void ProcessCards(TimingLogger* timings,
+                    bool use_rem_sets,
+                    bool process_alloc_space_cards,
                     bool clear_alloc_space_cards);
 
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
   void PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
   void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
 
   void ClearConcurrentGCRequest();
   void ClearPendingTrim(Thread* self) REQUIRES(!*pending_task_lock_);
@@ -1257,7 +1307,6 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
-  friend class ScopedHeapFill;
   friend class space::SpaceTest;
 
   class AllocationTimer {
diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc
index 555ad7c..a6cbb71 100644
--- a/runtime/jit/jit_code_cache_test.cc
+++ b/runtime/jit/jit_code_cache_test.cc
@@ -50,7 +50,7 @@
   ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code));
   ASSERT_EQ(code_cache->NumMethods(), 1u);
   ClassLinker* const cl = Runtime::Current()->GetClassLinker();
-  ArtMethod* method = &cl->AllocArtMethodArray(soa.Self(), 1)->At(0, 0);
+  ArtMethod* method = &cl->AllocArtMethodArray(soa.Self(), 1)->At(0);
   ASSERT_FALSE(code_cache->ContainsMethod(method));
   method->SetEntryPointFromQuickCompiledCode(reserved_code);
   ASSERT_TRUE(code_cache->ContainsMethod(method));
diff --git a/runtime/length_prefixed_array.h b/runtime/length_prefixed_array.h
index 82176e3..2b2e8d3 100644
--- a/runtime/length_prefixed_array.h
+++ b/runtime/length_prefixed_array.h
@@ -48,16 +48,22 @@
     return offsetof(LengthPrefixedArray<T>, data_) + index * element_size;
   }
 
+  // Alignment is the caller's responsibility.
   static size_t ComputeSize(size_t num_elements, size_t element_size = sizeof(T)) {
-    return sizeof(LengthPrefixedArray<T>) + num_elements * element_size;
+    return OffsetOfElement(num_elements, element_size);
   }
 
   uint64_t Length() const {
     return length_;
   }
 
+  // Update the length but does not reallocate storage.
+  void SetLength(uint64_t length) {
+    length_ = length;
+  }
+
  private:
-  uint64_t length_;  // 64 bits for padding reasons.
+  uint64_t length_;  // 64 bits for 8 byte alignment of data_.
   uint8_t data_[0];
 };
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 701ba4a..6af90bb 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -824,6 +824,34 @@
   }
 }
 
+class ReadBarrierOnNativeRootsVisitor {
+ public:
+  void operator()(mirror::Object* obj ATTRIBUTE_UNUSED,
+                  MemberOffset offset ATTRIBUTE_UNUSED,
+                  bool is_static ATTRIBUTE_UNUSED) const {}
+
+  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::Object* old_ref = root->AsMirrorPtr();
+    mirror::Object* new_ref = ReadBarrier::BarrierForRoot(root);
+    if (old_ref != new_ref) {
+      // Update the field atomically. This may fail if mutator updates before us, but it's ok.
+      auto* atomic_root =
+          reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
+      atomic_root->CompareExchangeStrongSequentiallyConsistent(
+          mirror::CompressedReference<mirror::Object>::FromMirrorPtr(old_ref),
+          mirror::CompressedReference<mirror::Object>::FromMirrorPtr(new_ref));
+    }
+  }
+};
+
 // The pre-fence visitor for Class::CopyOf().
 class CopyClassVisitor {
  public:
@@ -842,6 +870,10 @@
     mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
     h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
+    // Visit all of the references to make sure there is no from space references in the native
+    // roots.
+    h_new_class_obj->VisitReferences<true>(h_new_class_obj->GetClass(),
+                                           ReadBarrierOnNativeRootsVisitor());
   }
 
  private:
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index f138936..513ab37 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1205,7 +1205,8 @@
   // listed in ifields; fields declared by a superclass are listed in
   // the superclass's Class.ifields.
   //
-  // ArtField arrays are allocated as an array of fields, and not an array of fields pointers.
+  // ArtFields are allocated as a length prefixed ArtField array, and not an array of pointers to
+  // ArtFields.
   uint64_t ifields_;
 
   // Static fields length-prefixed array.
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index c69f30e..a9da51b 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -31,7 +31,7 @@
 
   StrideIterator(T* ptr, size_t stride)
       : ptr_(reinterpret_cast<uintptr_t>(ptr)),
-        stride_(reinterpret_cast<uintptr_t>(stride)) {}
+        stride_(stride) {}
 
   bool operator==(const StrideIterator& other) const {
     DCHECK_EQ(stride_, other.stride_);
@@ -48,17 +48,22 @@
   }
 
   StrideIterator operator++(int) {
-    auto temp = *this;
+    StrideIterator<T> temp = *this;
     ptr_ += stride_;
     return temp;
   }
 
   StrideIterator operator+(ssize_t delta) const {
-    auto temp = *this;
-    temp.ptr_ += static_cast<ssize_t>(stride_) * delta;
+    StrideIterator<T> temp = *this;
+    temp += delta;
     return temp;
   }
 
+  StrideIterator& operator+=(ssize_t delta) {
+    ptr_ += static_cast<ssize_t>(stride_) * delta;
+    return *this;
+  }
+
   T& operator*() const {
     return *reinterpret_cast<T*>(ptr_);
   }
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 7fe8bb9..b86a4c8 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -16,6 +16,7 @@
 
 #include "reg_type-inl.h"
 
+#include "base/bit_vector-inl.h"
 #include "base/casts.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -309,13 +310,17 @@
 
 std::string UnresolvedMergedType::Dump() const {
   std::stringstream result;
-  std::set<uint16_t> types = GetMergedTypes();
-  result << "UnresolvedMergedReferences(";
-  auto it = types.begin();
-  result << reg_type_cache_->GetFromId(*it).Dump();
-  for (++it; it != types.end(); ++it) {
-    result << ", ";
-    result << reg_type_cache_->GetFromId(*it).Dump();
+  result << "UnresolvedMergedReferences(" << GetResolvedPart().Dump() << " | ";
+  const BitVector& types = GetUnresolvedTypes();
+
+  bool first = true;
+  for (uint32_t idx : types.Indexes()) {
+    if (!first) {
+      result << ", ";
+    } else {
+      first = false;
+    }
+    result << reg_type_cache_->GetFromId(idx).Dump();
   }
   result << ")";
   return result.str();
@@ -492,32 +497,6 @@
   return true;
 }
 
-std::set<uint16_t> UnresolvedMergedType::GetMergedTypes() const {
-  std::pair<uint16_t, uint16_t> refs = GetTopMergedTypes();
-  const RegType& left = reg_type_cache_->GetFromId(refs.first);
-  const RegType& right = reg_type_cache_->GetFromId(refs.second);
-
-  std::set<uint16_t> types;
-  if (left.IsUnresolvedMergedReference()) {
-    types = down_cast<const UnresolvedMergedType*>(&left)->GetMergedTypes();
-  } else {
-    types.insert(refs.first);
-  }
-  if (right.IsUnresolvedMergedReference()) {
-    std::set<uint16_t> right_types =
-        down_cast<const UnresolvedMergedType*>(&right)->GetMergedTypes();
-    types.insert(right_types.begin(), right_types.end());
-  } else {
-    types.insert(refs.second);
-  }
-  if (kIsDebugBuild) {
-    for (const auto& type : types) {
-      CHECK(!reg_type_cache_->GetFromId(type).IsUnresolvedMergedReference());
-    }
-  }
-  return types;
-}
-
 const RegType& RegType::GetSuperClass(RegTypeCache* cache) const {
   if (!IsUnresolvedTypes()) {
     mirror::Class* super_klass = GetClass()->GetSuperClass();
@@ -803,12 +782,24 @@
   CHECK(klass_.IsNull()) << *this;
 }
 
+UnresolvedMergedType::UnresolvedMergedType(const RegType& resolved,
+                                           const BitVector& unresolved,
+                                           const RegTypeCache* reg_type_cache,
+                                           uint16_t cache_id)
+    : UnresolvedType("", cache_id),
+      reg_type_cache_(reg_type_cache),
+      resolved_part_(resolved),
+      unresolved_types_(unresolved, false, unresolved.GetAllocator()) {
+  if (kIsDebugBuild) {
+    CheckInvariants();
+  }
+}
 void UnresolvedMergedType::CheckInvariants() const {
   // Unresolved merged types: merged types should be defined.
   CHECK(descriptor_.empty()) << *this;
   CHECK(klass_.IsNull()) << *this;
-  CHECK_NE(merged_types_.first, 0U) << *this;
-  CHECK_NE(merged_types_.second, 0U) << *this;
+  CHECK(resolved_part_.IsReferenceTypes());
+  CHECK(!resolved_part_.IsUnresolvedTypes());
 }
 
 void UnresolvedReferenceType::CheckInvariants() const {
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 4893088..2834a9a 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -22,6 +22,7 @@
 #include <set>
 #include <string>
 
+#include "base/bit_vector.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
@@ -230,6 +231,14 @@
   // from another.
   const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  // Same as above, but also handles the case where incoming_type == this.
+  const RegType& SafeMerge(const RegType& incoming_type, RegTypeCache* reg_types) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Equals(incoming_type)) {
+      return *this;
+    }
+    return Merge(incoming_type, reg_types);
+  }
 
   /*
    * A basic Join operation on classes. For a pair of types S and T the Join,
@@ -868,30 +877,23 @@
   const RegTypeCache* const reg_type_cache_;
 };
 
-// A merge of two unresolved types. If the types were resolved this may be
-// Conflict or another
-// known ReferenceType.
+// A merge of unresolved (and resolved) types. If the types were resolved this may be
+// Conflict or another known ReferenceType.
 class UnresolvedMergedType FINAL : public UnresolvedType {
  public:
-  UnresolvedMergedType(uint16_t left_id, uint16_t right_id,
+  // Note: the constructor will copy the unresolved BitVector, not use it directly.
+  UnresolvedMergedType(const RegType& resolved, const BitVector& unresolved,
                        const RegTypeCache* reg_type_cache, uint16_t cache_id)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : UnresolvedType("", cache_id),
-        reg_type_cache_(reg_type_cache),
-        merged_types_(left_id, right_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
-  }
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // The top of a tree of merged types.
-  std::pair<uint16_t, uint16_t> GetTopMergedTypes() const {
-    DCHECK(IsUnresolvedMergedReference());
-    return merged_types_;
+  // The resolved part. See description below.
+  const RegType& GetResolvedPart() const {
+    return resolved_part_;
   }
-
-  // The complete set of merged types.
-  std::set<uint16_t> GetMergedTypes() const;
+  // The unresolved part.
+  const BitVector& GetUnresolvedTypes() const {
+    return unresolved_types_;
+  }
 
   bool IsUnresolvedMergedReference() const OVERRIDE { return true; }
 
@@ -903,7 +905,16 @@
   void CheckInvariants() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   const RegTypeCache* const reg_type_cache_;
-  const std::pair<uint16_t, uint16_t> merged_types_;
+
+  // The original implementation of merged types was a binary tree. Collection of the flattened
+  // types ("leaves") can be expensive, so we store the expanded list now, as two components:
+  // 1) A resolved component. We use Zero when there is no resolved component, as that will be
+  //    an identity merge.
+  // 2) A bitvector of the unresolved reference types. A bitvector was chosen with the assumption
+  //    that there should not be too many types in flight in practice. (We also bias the index
+  //    against the index of Zero, which is one of the later default entries in any cache.)
+  const RegType& resolved_part_;
+  const BitVector unresolved_types_;
 };
 
 std::ostream& operator<<(std::ostream& os, const RegType& rhs)
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 4469e64..e14306c 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -317,39 +317,62 @@
 }
 
 const RegType& RegTypeCache::FromUnresolvedMerge(const RegType& left, const RegType& right) {
-  std::set<uint16_t> types;
+  BitVector types(1,                                    // Allocate at least a word.
+                  true,                                 // Is expandable.
+                  Allocator::GetMallocAllocator());     // TODO: Arenas in the verifier.
+  const RegType* left_resolved;
   if (left.IsUnresolvedMergedReference()) {
-    RegType& non_const(const_cast<RegType&>(left));
-    types = (down_cast<UnresolvedMergedType*>(&non_const))->GetMergedTypes();
+    const UnresolvedMergedType* left_merge = down_cast<const UnresolvedMergedType*>(&left);
+    types.Copy(&left_merge->GetUnresolvedTypes());
+    left_resolved = &left_merge->GetResolvedPart();
+  } else if (left.IsUnresolvedTypes()) {
+    types.SetBit(left.GetId());
+    left_resolved = &Zero();
   } else {
-    types.insert(left.GetId());
+    left_resolved = &left;
   }
+
+  const RegType* right_resolved;
   if (right.IsUnresolvedMergedReference()) {
-    RegType& non_const(const_cast<RegType&>(right));
-    std::set<uint16_t> right_types = (down_cast<UnresolvedMergedType*>(&non_const))->GetMergedTypes();
-    types.insert(right_types.begin(), right_types.end());
+    const UnresolvedMergedType* right_merge = down_cast<const UnresolvedMergedType*>(&right);
+    types.Union(&right_merge->GetUnresolvedTypes());
+    right_resolved = &right_merge->GetResolvedPart();
+  } else if (right.IsUnresolvedTypes()) {
+    types.SetBit(right.GetId());
+    right_resolved = &Zero();
   } else {
-    types.insert(right.GetId());
+    right_resolved = &right;
   }
+
+  // Merge the resolved parts. Left and right might be equal, so use SafeMerge.
+  const RegType& resolved_parts_merged = left_resolved->SafeMerge(*right_resolved, this);
+  // If we get a conflict here, the merge result is a conflict, not an unresolved merge type.
+  if (resolved_parts_merged.IsConflict()) {
+    return Conflict();
+  }
+
   // Check if entry already exists.
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
     const RegType* cur_entry = entries_[i];
     if (cur_entry->IsUnresolvedMergedReference()) {
-      std::set<uint16_t> cur_entry_types =
-          (down_cast<const UnresolvedMergedType*>(cur_entry))->GetMergedTypes();
-      if (cur_entry_types == types) {
+      const UnresolvedMergedType* cmp_type = down_cast<const UnresolvedMergedType*>(cur_entry);
+      const RegType& resolved_part = cmp_type->GetResolvedPart();
+      const BitVector& unresolved_part = cmp_type->GetUnresolvedTypes();
+      // Use SameBitsSet. "types" is expandable to allow merging in the components, but the
+      // BitVector in the final RegType will be made non-expandable.
+      if (&resolved_part == &resolved_parts_merged &&
+              types.SameBitsSet(&unresolved_part)) {
         return *cur_entry;
       }
     }
   }
+
   // Create entry.
-  RegType* entry = new UnresolvedMergedType(left.GetId(), right.GetId(), this, entries_.size());
+  RegType* entry = new UnresolvedMergedType(resolved_parts_merged,
+                                            types,
+                                            this,
+                                            entries_.size());
   AddEntry(entry);
-  if (kIsDebugBuild) {
-    UnresolvedMergedType* tmp_entry = down_cast<UnresolvedMergedType*>(entry);
-    std::set<uint16_t> check_types = tmp_entry->GetMergedTypes();
-    CHECK(check_types == types);
-  }
   return *entry;
 }
 
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 2fecc8b..971b1f5 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -18,6 +18,7 @@
 
 #include <set>
 
+#include "base/bit_vector.h"
 #include "base/casts.h"
 #include "common_runtime_test.h"
 #include "reg_type_cache-inl.h"
@@ -421,7 +422,7 @@
   EXPECT_EQ(expected, resolved_unintialiesd.Dump());
   expected = "Unresolved And Uninitialized Reference: java.lang.DoesNotExist Allocation PC: 12";
   EXPECT_EQ(expected, unresolved_unintialized.Dump());
-  expected = "UnresolvedMergedReferences(Unresolved Reference: java.lang.DoesNotExist, Unresolved Reference: java.lang.DoesNotExistEither)";
+  expected = "UnresolvedMergedReferences(Zero/null | Unresolved Reference: java.lang.DoesNotExist, Unresolved Reference: java.lang.DoesNotExistEither)";
   EXPECT_EQ(expected, unresolved_merged.Dump());
 }
 
@@ -477,9 +478,10 @@
   EXPECT_TRUE(merged.IsUnresolvedMergedReference());
   RegType& merged_nonconst = const_cast<RegType&>(merged);
 
-  std::set<uint16_t> merged_ids = (down_cast<UnresolvedMergedType*>(&merged_nonconst))->GetMergedTypes();
-  EXPECT_EQ(ref_type_0.GetId(), *(merged_ids.begin()));
-  EXPECT_EQ(ref_type_1.GetId(), *((++merged_ids.begin())));
+  const BitVector& unresolved_parts =
+      down_cast<UnresolvedMergedType*>(&merged_nonconst)->GetUnresolvedTypes();
+  EXPECT_TRUE(unresolved_parts.IsBitSet(ref_type_0.GetId()));
+  EXPECT_TRUE(unresolved_parts.IsBitSet(ref_type_1.GetId()));
 }
 
 TEST_F(RegTypeTest, MergingFloat) {
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index 6e7d5b6..1db7cc8 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -32,6 +32,7 @@
 //
 // ThreadStress command line parameters:
 //    -n X ............ number of threads
+//    -d X ............ number of daemon threads
 //    -o X ............ number of overall operations
 //    -t X ............ number of operations per thread
 //    --dumpmap ....... print the frequency map
@@ -301,6 +302,7 @@
 
     public static void parseAndRun(String[] args) throws Exception {
         int numberOfThreads = -1;
+        int numberOfDaemons = -1;
         int totalOperations = -1;
         int operationsPerThread = -1;
         Object lock = new Object();
@@ -312,6 +314,9 @@
                 if (args[i].equals("-n")) {
                     i++;
                     numberOfThreads = Integer.parseInt(args[i]);
+                } else if (args[i].equals("-d")) {
+                    i++;
+                    numberOfDaemons = Integer.parseInt(args[i]);
                 } else if (args[i].equals("-o")) {
                     i++;
                     totalOperations = Integer.parseInt(args[i]);
@@ -338,6 +343,10 @@
             numberOfThreads = 5;
         }
 
+        if (numberOfDaemons == -1) {
+            numberOfDaemons = 3;
+        }
+
         if (totalOperations == -1) {
             totalOperations = 1000;
         }
@@ -355,14 +364,16 @@
             System.out.println(frequencyMap);
         }
 
-        runTest(numberOfThreads, operationsPerThread, lock, frequencyMap);
+        runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
     }
 
-    public static void runTest(final int numberOfThreads, final int operationsPerThread,
-                               final Object lock, Map<Operation, Double> frequencyMap)
-                                   throws Exception {
-        // Each thread is going to do operationsPerThread
-        // operations. The distribution of operations is determined by
+    public static void runTest(final int numberOfThreads, final int numberOfDaemons,
+                               final int operationsPerThread, final Object lock,
+                               Map<Operation, Double> frequencyMap) throws Exception {
+        // Each normal thread is going to do operationsPerThread
+        // operations. Each daemon thread will loop over all
+        // the operations and will not stop.
+        // The distribution of operations is determined by
         // the Operation.frequency values. We fill out an Operation[]
         // for each thread with the operations it is to perform. The
         // Operation[] is shuffled so that there is more random
@@ -371,7 +382,9 @@
         // Fill in the Operation[] array for each thread by laying
         // down references to operation according to their desired
         // frequency.
-        final Main[] threadStresses = new Main[numberOfThreads];
+        // The first numberOfThreads elements are normal threads, the last
+        // numberOfDaemons elements are daemon threads.
+        final Main[] threadStresses = new Main[numberOfThreads + numberOfDaemons];
         for (int t = 0; t < threadStresses.length; t++) {
             Operation[] operations = new Operation[operationsPerThread];
             int o = 0;
@@ -388,9 +401,10 @@
                     }
                 }
             }
-            // Randomize the oepration order
+            // Randomize the operation order
             Collections.shuffle(Arrays.asList(operations));
-            threadStresses[t] = new Main(lock, t, operations);
+            threadStresses[t] = t < numberOfThreads ? new Main(lock, t, operations) :
+                                                      new Daemon(lock, t, operations);
         }
 
         // Enable to dump operation counts per thread to make sure its
@@ -459,6 +473,14 @@
             notifier.start();
         }
 
+        // Create and start the daemon threads.
+        for (int r = 0; r < numberOfDaemons; r++) {
+            Main daemon = threadStresses[numberOfThreads + r];
+            Thread t = new Thread(daemon, "Daemon thread " + daemon.id);
+            t.setDaemon(true);
+            t.start();
+        }
+
         for (int r = 0; r < runners.length; r++) {
             runners[r].start();
         }
@@ -467,9 +489,9 @@
         }
     }
 
-    private final Operation[] operations;
+    protected final Operation[] operations;
     private final Object lock;
-    private final int id;
+    protected final int id;
 
     private int nextOperation;
 
@@ -503,4 +525,33 @@
         }
     }
 
+    private static class Daemon extends Main {
+        private Daemon(Object lock, int id, Operation[] operations) {
+            super(lock, id, operations);
+        }
+
+        public void run() {
+            try {
+                if (DEBUG) {
+                    System.out.println("Starting ThreadStress Daemon " + id);
+                }
+                int i = 0;
+                while (true) {
+                    Operation operation = operations[i];
+                    if (DEBUG) {
+                        System.out.println("ThreadStress Daemon " + id
+                                           + " operation " + i
+                                           + " is " + operation);
+                    }
+                    operation.perform();
+                    i = (i + 1) % operations.length;
+                }
+            } finally {
+                if (DEBUG) {
+                    System.out.println("Finishing ThreadStress Daemon for " + id);
+                }
+            }
+        }
+    }
+
 }
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 728ccea..884f280 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -37,4 +37,5 @@
 b/22411633 (4)
 b/22411633 (5)
 b/22777307
+b/22881413
 Done!
diff --git a/test/800-smali/smali/b_22881413.smali b/test/800-smali/smali/b_22881413.smali
new file mode 100644
index 0000000..29dd82a
--- /dev/null
+++ b/test/800-smali/smali/b_22881413.smali
@@ -0,0 +1,295 @@
+.class public LB22881413;
+.super Ljava/lang/Object;
+
+# A couple of fields to allow "loading" resolved and unresolved types. Use non-final classes to
+# avoid automatically getting precise reference types.
+.field private static res1:Ljava/lang/Number;
+.field private static res2:Ljava/lang/ClassLoader;
+.field private static res3:Ljava/lang/Package;
+.field private static res4:Ljava/lang/RuntimeException;
+.field private static res5:Ljava/lang/Exception;
+.field private static res6:Ljava/util/ArrayList;
+.field private static res7:Ljava/util/LinkedList;
+.field private static res8:Ljava/lang/Thread;
+.field private static res9:Ljava/lang/ThreadGroup;
+.field private static res10:Ljava/lang/Runtime;
+
+.field private static unres1:La/b/c/d1;
+.field private static unres2:La/b/c/d2;
+.field private static unres3:La/b/c/d3;
+.field private static unres4:La/b/c/d4;
+.field private static unres5:La/b/c/d5;
+.field private static unres6:La/b/c/d6;
+.field private static unres7:La/b/c/d7;
+.field private static unres8:La/b/c/d8;
+.field private static unres9:La/b/c/d9;
+.field private static unres10:La/b/c/d10;
+
+.field private static unresBase0:La/b/c/dBase0;
+.field private static unresBase1:La/b/c/dBase1;
+.field private static unresBase2:La/b/c/dBase2;
+.field private static unresBase3:La/b/c/dBase3;
+.field private static unresBase4:La/b/c/dBase4;
+.field private static unresBase5:La/b/c/dBase5;
+.field private static unresBase6:La/b/c/dBase6;
+.field private static unresBase7:La/b/c/dBase7;
+.field private static unresBase8:La/b/c/dBase8;
+
+# Empty, ignore this. We want to see if the other method can be verified in a reasonable amount of
+# time.
+.method public static run()V
+.registers 2
+       return-void
+.end method
+
+.method public static foo(IZZ) V
+.registers 11
+       # v8 = int, v9 = boolean, v10 = boolean
+
+       sget-object v0, LB22881413;->unresBase0:La/b/c/dBase0;
+
+# Test an UnresolvedUninitializedReference type.
+       new-instance v0, La/b/c/dBaseInit;
+
+       const v1, 0
+       const v2, 0
+
+# We're trying to create something like this (with more loops to amplify things).
+#
+# v0 = Unresolved1
+# while (something) {
+#
+#   [Repeatedly]
+#   if (cond) {
+#     v0 = ResolvedX;
+#   } else {
+#     v0 = UnresolvedX;
+#   }
+#
+#   v0 = Unresolved2
+# };
+#
+# Important points:
+#   1) Use a while, so that the end of the loop is a goto. That way, the merging of outer-loop
+#      unresolved classes is postponed.
+#   2) Put the else cases after all if cases. That way there are backward gotos that will lead
+#      to stabilization loops in the body.
+#
+
+:Loop1
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop1End
+
+:Loop2
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop2End
+
+:Loop3
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop3End
+
+:Loop4
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop4End
+
+:Loop5
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop5End
+
+:Loop6
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop6End
+
+:Loop7
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop7End
+
+:Loop8
+
+       const v6, 0
+       add-int/lit16 v8, v8, -1
+       if-ge v8, v6, :Loop8End
+
+# Prototype:
+#
+#       if-eqz v9, :ElseX
+#       sget-object v0, LB22881413;->res1:Ljava/lang/Number;
+#:JoinX
+#
+# And somewhere at the end
+#
+#:ElseX
+#       sget-object v0, LB22881413;->unresX:La/b/c/dX;
+#       goto :JoinX
+#
+#
+
+       if-eqz v10, :Join1
+       if-eqz v9, :Else1
+       sget-object v0, LB22881413;->res1:Ljava/lang/Number;
+:Join1
+
+
+       if-eqz v10, :Join2
+       if-eqz v9, :Else2
+       sget-object v0, LB22881413;->res2:Ljava/lang/ClassLoader;
+:Join2
+
+
+       if-eqz v10, :Join3
+       if-eqz v9, :Else3
+       sget-object v0, LB22881413;->res3:Ljava/lang/Package;
+:Join3
+
+
+       if-eqz v10, :Join4
+       if-eqz v9, :Else4
+       sget-object v0, LB22881413;->res4:Ljava/lang/RuntimeException;
+:Join4
+
+
+       if-eqz v10, :Join5
+       if-eqz v9, :Else5
+       sget-object v0, LB22881413;->res5:Ljava/lang/Exception;
+:Join5
+
+
+       if-eqz v10, :Join6
+       if-eqz v9, :Else6
+       sget-object v0, LB22881413;->res6:Ljava/util/ArrayList;
+:Join6
+
+
+       if-eqz v10, :Join7
+       if-eqz v9, :Else7
+       sget-object v0, LB22881413;->res7:Ljava/util/LinkedList;
+:Join7
+
+
+       if-eqz v10, :Join8
+       if-eqz v9, :Else8
+       sget-object v0, LB22881413;->res8:Ljava/lang/Thread;
+:Join8
+
+
+       if-eqz v10, :Join9
+       if-eqz v9, :Else9
+       sget-object v0, LB22881413;->res9:Ljava/lang/ThreadGroup;
+:Join9
+
+
+       if-eqz v10, :Join10
+       if-eqz v9, :Else10
+       sget-object v0, LB22881413;->res10:Ljava/lang/Runtime;
+:Join10
+
+
+       goto :InnerMostLoopEnd
+
+:Else1
+       sget-object v0, LB22881413;->unres1:La/b/c/d1;
+       goto :Join1
+
+:Else2
+       sget-object v0, LB22881413;->unres2:La/b/c/d2;
+       goto :Join2
+
+:Else3
+       sget-object v0, LB22881413;->unres3:La/b/c/d3;
+       goto :Join3
+
+:Else4
+       sget-object v0, LB22881413;->unres4:La/b/c/d4;
+       goto :Join4
+
+:Else5
+       sget-object v0, LB22881413;->unres5:La/b/c/d5;
+       goto :Join5
+
+:Else6
+       sget-object v0, LB22881413;->unres6:La/b/c/d6;
+       goto :Join6
+
+:Else7
+       sget-object v0, LB22881413;->unres7:La/b/c/d7;
+       goto :Join7
+
+:Else8
+       sget-object v0, LB22881413;->unres8:La/b/c/d8;
+       goto :Join8
+
+:Else9
+       sget-object v0, LB22881413;->unres9:La/b/c/d9;
+       goto :Join9
+
+:Else10
+       sget-object v0, LB22881413;->unres10:La/b/c/d10;
+       goto :Join10
+
+:InnerMostLoopEnd
+
+       # Loop 8 end of body.
+       sget-object v0, LB22881413;->unresBase8:La/b/c/dBase8;
+       goto :Loop8
+
+:Loop8End
+
+       # Loop 7 end of body.
+       sget-object v0, LB22881413;->unresBase7:La/b/c/dBase7;
+       goto :Loop7
+
+:Loop7End
+
+       # Loop 6 end of body.
+       sget-object v0, LB22881413;->unresBase6:La/b/c/dBase6;
+       goto :Loop6
+
+:Loop6End
+
+       # Loop 5 end of body
+       sget-object v0, LB22881413;->unresBase5:La/b/c/dBase5;
+       goto :Loop5
+
+:Loop5End
+
+       # Loop 4 end of body
+       sget-object v0, LB22881413;->unresBase4:La/b/c/dBase4;
+       goto :Loop4
+
+:Loop4End
+
+       # Loop 3 end of body
+       sget-object v0, LB22881413;->unresBase3:La/b/c/dBase3;
+       goto :Loop3
+
+:Loop3End
+
+       # Loop 2 end of body
+       sget-object v0, LB22881413;->unresBase2:La/b/c/dBase2;
+       goto :Loop2
+
+:Loop2End
+
+       # Loop 1 end of body
+       sget-object v0, LB22881413;->unresBase1:La/b/c/dBase1;
+       goto :Loop1
+
+:Loop1End
+
+       return-void
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 438e214..e1ac749 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -121,6 +121,7 @@
                 null, null));
         testCases.add(new TestCase("b/22777307", "B22777307", "run", null, new InstantiationError(),
                 null));
+        testCases.add(new TestCase("b/22881413", "B22881413", "run", null, null, null));
     }
 
     public void runTests() {
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 750a29f..a1af577 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -338,6 +338,17 @@
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
     dex2oat_cmdline="${dex2oat_cmdline} --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
   fi
+
+  # Add in a timeout. This is important for testing the compilation/verification time of
+  # pathological cases.
+  # Note: as we don't know how decent targets are (e.g., emulator), only do this on the host for
+  #       now. We should try to improve this.
+  #       The current value is rather arbitrary. run-tests should compile quickly.
+  if [ "$HOST" != "n" ]; then
+    # Use SIGRTMIN+2 to try to dump threads.
+    # Use -k 1m to SIGKILL it a minute later if it hasn't ended.
+    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}"
+  fi
 fi
 
 DALVIKVM_ISA_FEATURES_ARGS=""