dex2oat: Pack likely-dirty objects together when generating the boot image

This introduces a new algorithm into image writer which "bins" objects
by how likely they are to be dirtied at runtime. Objects in the same bin
are placed contiguously in memory (i.e. into the same page). We try to
tune the bin selection based on how clean or how dirty the object will
likely be at runtime.

As-is, this saves about 150KB per-process (private-dirty pages) and 700KB in
zygote (shared-dirty).

There is still about 800KB of objects that are clean but located in
dirty pages, so with more analysis we can tune the bin selection and get
even more memory savings.

Bug: 17611661
Change-Id: Ia1455e4c56ffd0a36ae2a723d35b7e06502980f7
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 7905157..193be15 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -55,7 +55,8 @@
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
-#include "utils.h"
+
+#include <numeric>
 
 using ::art::mirror::ArtField;
 using ::art::mirror::ArtMethod;
@@ -68,6 +69,9 @@
 
 namespace art {
 
+// Separate objects into multiple bins to optimize dirty memory use.
+static constexpr bool kBinObjects = true;
+
 bool ImageWriter::Write(const std::string& image_filename,
                         uintptr_t image_begin,
                         const std::string& oat_filename,
@@ -191,54 +195,43 @@
   return true;
 }
 
-void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) {
+void ImageWriter::SetImageOffset(mirror::Object* object,
+                                 ImageWriter::BinSlot bin_slot,
+                                 size_t offset) {
   DCHECK(object != nullptr);
   DCHECK_NE(offset, 0U);
-  DCHECK(!IsImageOffsetAssigned(object));
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset);
   DCHECK_ALIGNED(obj, kObjectAlignment);
-  image_bitmap_->Set(obj);
-  // Before we stomp over the lock word, save the hash code for later.
-  Monitor::Deflate(Thread::Current(), object);;
-  LockWord lw(object->GetLockWord(false));
-  switch (lw.GetState()) {
-    case LockWord::kFatLocked: {
-      LOG(FATAL) << "Fat locked object " << obj << " found during object copy";
-      break;
+
+  image_bitmap_->Set(obj);  // Mark the obj as mutated, since we will end up changing it.
+  {
+    // Remember the object-inside-of-the-image's hash code so we can restore it after the copy.
+    auto hash_it = saved_hashes_map_.find(bin_slot);
+    if (hash_it != saved_hashes_map_.end()) {
+      std::pair<BinSlot, uint32_t> slot_hash = *hash_it;
+      saved_hashes_.push_back(std::make_pair(obj, slot_hash.second));
+      saved_hashes_map_.erase(hash_it);
     }
-    case LockWord::kThinLocked: {
-      LOG(FATAL) << "Thin locked object " << obj << " found during object copy";
-      break;
-    }
-    case LockWord::kUnlocked:
-      // No hash, don't need to save it.
-      break;
-    case LockWord::kHashCode:
-      saved_hashes_.push_back(std::make_pair(obj, lw.GetHashCode()));
-      break;
-    default:
-      LOG(FATAL) << "Unreachable.";
-      break;
   }
+  // The object is already deflated from when we set the bin slot. Just overwrite the lock word.
   object->SetLockWord(LockWord::FromForwardingAddress(offset), false);
   DCHECK(IsImageOffsetAssigned(object));
 }
 
-void ImageWriter::AssignImageOffset(mirror::Object* object) {
+void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot bin_slot) {
   DCHECK(object != nullptr);
-  SetImageOffset(object, image_end_);
-  size_t object_size;
-  if (object->IsArtMethod()) {
-    // Methods are sized based on the target pointer size.
-    object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
-  } else {
-    object_size = object->SizeOf();
-  }
-  image_end_ += RoundUp(object_size, 8);  // 64-bit alignment
-  DCHECK_LT(image_end_, image_->Size());
+  DCHECK_NE(image_objects_offset_begin_, 0u);
+
+  size_t previous_bin_sizes = GetBinSizeSum(bin_slot.GetBin());  // sum sizes in [0..bin#)
+  size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex();
+  DCHECK_ALIGNED(new_offset, kObjectAlignment);
+
+  SetImageOffset(object, bin_slot, new_offset);
+  DCHECK_LT(new_offset, image_end_);
 }
 
 bool ImageWriter::IsImageOffsetAssigned(mirror::Object* object) const {
+  // Will also return true if the bin slot was assigned since we are reusing the lock word.
   DCHECK(object != nullptr);
   return object->GetLockWord(false).GetState() == LockWord::kForwardingAddress;
 }
@@ -252,6 +245,178 @@
   return offset;
 }
 
+void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) {
+  DCHECK(object != nullptr);
+  DCHECK(!IsImageOffsetAssigned(object));
+  DCHECK(!IsImageBinSlotAssigned(object));
+
+  // Before we stomp over the lock word, save the hash code for later.
+  Monitor::Deflate(Thread::Current(), object);;
+  LockWord lw(object->GetLockWord(false));
+  switch (lw.GetState()) {
+    case LockWord::kFatLocked: {
+      LOG(FATAL) << "Fat locked object " << object << " found during object copy";
+      break;
+    }
+    case LockWord::kThinLocked: {
+      LOG(FATAL) << "Thin locked object " << object << " found during object copy";
+      break;
+    }
+    case LockWord::kUnlocked:
+      // No hash, don't need to save it.
+      break;
+    case LockWord::kHashCode:
+      saved_hashes_map_[bin_slot] = lw.GetHashCode();
+      break;
+    default:
+      LOG(FATAL) << "Unreachable.";
+      break;
+  }
+  object->SetLockWord(LockWord::FromForwardingAddress(static_cast<uint32_t>(bin_slot)),
+                      false);
+  DCHECK(IsImageBinSlotAssigned(object));
+}
+
+void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
+  DCHECK(object != nullptr);
+  size_t object_size;
+  if (object->IsArtMethod()) {
+    // Methods are sized based on the target pointer size.
+    object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
+  } else {
+    object_size = object->SizeOf();
+  }
+
+  // The magic happens here. We segregate objects into different bins based
+  // on how likely they are to get dirty at runtime.
+  //
+  // Likely-to-dirty objects get packed together into the same bin so that
+  // at runtime their page dirtiness ratio (how many dirty objects a page has) is
+  // maximized.
+  //
+  // This means more pages will stay either clean or shared dirty (with zygote) and
+  // the app will use less of its own (private) memory.
+  Bin bin = kBinRegular;
+
+  if (kBinObjects) {
+    //
+    // Changing the bin of an object is purely a memory-use tuning.
+    // It has no change on runtime correctness.
+    //
+    // Memory analysis has determined that the following types of objects get dirtied
+    // the most:
+    //
+    // * Class'es which are verified [their clinit runs only at runtime]
+    //   - classes in general [because their static fields get overwritten]
+    //   - initialized classes with all-final statics are unlikely to be ever dirty,
+    //     so bin them separately
+    // * Art Methods that are:
+    //   - native [their native entry point is not looked up until runtime]
+    //   - have declaring classes that aren't initialized
+    //            [their interpreter/quick entry points are trampolines until the class
+    //             becomes initialized]
+    //
+    // We also assume the following objects get dirtied either never or extremely rarely:
+    //  * Strings (they are immutable)
+    //  * Art methods that aren't native and have initialized declared classes
+    //
+    // We assume that "regular" bin objects are highly unlikely to become dirtied,
+    // so packing them together will not result in a noticeably tighter dirty-to-clean ratio.
+    //
+    if (object->IsClass()) {
+      bin = kBinClassVerified;
+      mirror::Class* klass = object->AsClass();
+
+      if (klass->GetStatus() == Class::kStatusInitialized) {
+        bin = kBinClassInitialized;
+
+        // If the class's static fields are all final, put it into a separate bin
+        // since it's very likely it will stay clean.
+        uint32_t num_static_fields = klass->NumStaticFields();
+        if (num_static_fields == 0) {
+          bin = kBinClassInitializedFinalStatics;
+        } else {
+          // Maybe all the statics are final?
+          bool all_final = true;
+          for (uint32_t i = 0; i < num_static_fields; ++i) {
+            ArtField* field = klass->GetStaticField(i);
+            if (!field->IsFinal()) {
+              all_final = false;
+              break;
+            }
+          }
+
+          if (all_final) {
+            bin = kBinClassInitializedFinalStatics;
+          }
+        }
+      }
+    } else if (object->IsArtMethod<kVerifyNone>()) {
+      mirror::ArtMethod* art_method = down_cast<ArtMethod*>(object);
+      if (art_method->IsNative()) {
+        bin = kBinArtMethodNative;
+      } else {
+        mirror::Class* declaring_class = art_method->GetDeclaringClass();
+        if (declaring_class->GetStatus() != Class::kStatusInitialized) {
+          bin = kBinArtMethodNotInitialized;
+        } else {
+          // This is highly unlikely to dirty since there's no entry points to mutate.
+          bin = kBinArtMethodsManagedInitialized;
+        }
+      }
+    } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
+      bin = kBinString;  // Strings are almost always immutable (except for object header).
+    }  // else bin = kBinRegular
+  }
+
+  size_t current_offset = bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
+  // Move the current bin size up to accomodate the object we just assigned a bin slot.
+  size_t offset_delta = RoundUp(object_size, kObjectAlignment);  // 64-bit alignment
+  bin_slot_sizes_[bin] += offset_delta;
+
+  BinSlot new_bin_slot(bin, current_offset);
+  SetImageBinSlot(object, new_bin_slot);
+
+  ++bin_slot_count_[bin];
+
+  DCHECK_LT(GetBinSizeSum(), image_->Size());
+
+  // Grow the image closer to the end by the object we just assigned.
+  image_end_ += offset_delta;
+  DCHECK_LT(image_end_, image_->Size());
+}
+
+bool ImageWriter::IsImageBinSlotAssigned(mirror::Object* object) const {
+  DCHECK(object != nullptr);
+
+  // We always stash the bin slot into a lockword, in the 'forwarding address' state.
+  // If it's in some other state, then we haven't yet assigned an image bin slot.
+  if (object->GetLockWord(false).GetState() != LockWord::kForwardingAddress) {
+    return false;
+  } else if (kIsDebugBuild) {
+    LockWord lock_word = object->GetLockWord(false);
+    size_t offset = lock_word.ForwardingAddress();
+    BinSlot bin_slot(offset);
+    DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()])
+      << "bin slot offset should not exceed the size of that bin";
+  }
+  return true;
+}
+
+ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const {
+  DCHECK(object != nullptr);
+  DCHECK(IsImageBinSlotAssigned(object));
+
+  LockWord lock_word = object->GetLockWord(false);
+  size_t offset = lock_word.ForwardingAddress();  // TODO: ForwardingAddress should be uint32_t
+  DCHECK_LE(offset, std::numeric_limits<uint32_t>::max());
+
+  BinSlot bin_slot(static_cast<uint32_t>(offset));
+  DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()]);
+
+  return bin_slot;
+}
+
 bool ImageWriter::AllocMemory() {
   size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
   std::string error_msg;
@@ -560,29 +725,29 @@
   }
 }
 
-void ImageWriter::CalculateObjectOffsets(Object* obj) {
+void ImageWriter::CalculateObjectBinSlots(Object* obj) {
   DCHECK(obj != NULL);
   // if it is a string, we want to intern it if its not interned.
   if (obj->GetClass()->IsStringClass()) {
     // we must be an interned string that was forward referenced and already assigned
-    if (IsImageOffsetAssigned(obj)) {
+    if (IsImageBinSlotAssigned(obj)) {
       DCHECK_EQ(obj, obj->AsString()->Intern());
       return;
     }
     mirror::String* const interned = obj->AsString()->Intern();
     if (obj != interned) {
-      if (!IsImageOffsetAssigned(interned)) {
+      if (!IsImageBinSlotAssigned(interned)) {
         // interned obj is after us, allocate its location early
-        AssignImageOffset(interned);
+        AssignImageBinSlot(interned);
       }
       // point those looking for this object to the interned version.
-      SetImageOffset(obj, GetImageOffset(interned));
+      SetImageBinSlot(obj, GetImageBinSlot(interned));
       return;
     }
     // else (obj == interned), nothing to do but fall through to the normal case
   }
 
-  AssignImageOffset(obj);
+  AssignImageBinSlot(obj);
 }
 
 ObjectArray<Object>* ImageWriter::CreateImageRoots() const {
@@ -663,13 +828,15 @@
 
 // For an unvisited object, visit it then all its children found via fields.
 void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
-  if (!IsImageOffsetAssigned(obj)) {
+  // Use our own visitor routine (instead of GC visitor) to get better locality between
+  // an object and its fields
+  if (!IsImageBinSlotAssigned(obj)) {
     // Walk instance fields of all objects
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::Object> h_obj(hs.NewHandle(obj));
     Handle<mirror::Class> klass(hs.NewHandle(obj->GetClass()));
     // visit the object itself.
-    CalculateObjectOffsets(h_obj.Get());
+    CalculateObjectBinSlots(h_obj.Get());
     WalkInstanceFields(h_obj.Get(), klass.Get());
     // Walk static fields of a Class.
     if (h_obj->IsClass()) {
@@ -703,6 +870,24 @@
   writer->WalkFieldsInOrder(obj);
 }
 
+void ImageWriter::UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg) {
+  ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
+  DCHECK(writer != nullptr);
+  writer->UnbinObjectsIntoOffset(obj);
+}
+
+void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) {
+  CHECK(obj != nullptr);
+
+  // We know the bin slot, and the total bin sizes for all objects by now,
+  // so calculate the object's final image offset.
+
+  DCHECK(IsImageBinSlotAssigned(obj));
+  BinSlot bin_slot = GetImageBinSlot(obj);
+  // Change the lockword from a bin slot into an offset
+  AssignImageOffset(obj, bin_slot);
+}
+
 void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_data_offset) {
   CHECK_NE(0U, oat_loaded_size);
   Thread* self = Thread::Current();
@@ -714,18 +899,32 @@
 
   // Leave space for the header, but do not write it yet, we need to
   // know where image_roots is going to end up
-  image_end_ += RoundUp(sizeof(ImageHeader), 8);  // 64-bit-alignment
+  image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     // TODO: Image spaces only?
     const char* old = self->StartAssertNoThreadSuspension("ImageWriter");
     DCHECK_LT(image_end_, image_->Size());
-    // Clear any pre-existing monitors which may have been in the monitor words.
+    image_objects_offset_begin_ = image_end_;
+    // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
     heap->VisitObjects(WalkFieldsCallback, this);
+    // Transform each object's bin slot into an offset which will be used to do the final copy.
+    heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
+    DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
     self->EndAssertNoThreadSuspension(old);
   }
 
+  DCHECK_GT(image_end_, GetBinSizeSum());
+
+  if (kIsDebugBuild) {
+    LOG(INFO) << "Bin summary (total size: " << GetBinSizeSum() << "): ";
+    for (size_t bin = 0; bin < kBinSize; ++bin) {
+      LOG(INFO) << "  bin# " << bin << ", number objects: " << bin_slot_count_[bin] << ", "
+                << " total byte size: " << bin_slot_sizes_[bin];
+    }
+  }
+
   const byte* oat_file_begin = image_begin_ + RoundUp(image_end_, kPageSize);
   const byte* oat_file_end = oat_file_begin + oat_loaded_size;
   oat_data_begin_ = oat_file_begin + oat_data_offset;
@@ -794,6 +993,7 @@
   image_writer->FixupObject(obj, copy);
 }
 
+// Rewrite all the references in the copied object to point to their image address equivalent
 class FixupVisitor {
  public:
   FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) {
@@ -829,8 +1029,9 @@
   void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     DCHECK(obj->IsClass());
-    FixupVisitor::operator()(obj, offset, false);
+    FixupVisitor::operator()(obj, offset, /*is_static*/false);
 
+    // TODO: Remove dead code
     if (offset.Uint32Value() < mirror::Class::EmbeddedVTableOffset().Uint32Value()) {
       return;
     }
@@ -1059,4 +1260,32 @@
   image_header->SetOatChecksum(oat_header->GetChecksum());
 }
 
+size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const {
+  DCHECK_LE(up_to, kBinSize);
+  return std::accumulate(&bin_slot_sizes_[0], &bin_slot_sizes_[up_to], /*init*/0);
+}
+
+ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) {
+  // These values may need to get updated if more bins are added to the enum Bin
+  static_assert(kBinBits == 3, "wrong number of bin bits");
+  static_assert(kBinShift == 29, "wrong number of shift");
+  static_assert(sizeof(BinSlot) == sizeof(LockWord), "BinSlot/LockWord must have equal sizes");
+
+  DCHECK_LT(GetBin(), kBinSize);
+  DCHECK_ALIGNED(GetIndex(), kObjectAlignment);
+}
+
+ImageWriter::BinSlot::BinSlot(Bin bin, uint32_t index)
+    : BinSlot(index | (static_cast<uint32_t>(bin) << kBinShift)) {
+  DCHECK_EQ(index, GetIndex());
+}
+
+ImageWriter::Bin ImageWriter::BinSlot::GetBin() const {
+  return static_cast<Bin>((lockword_ & kBinMask) >> kBinShift);
+}
+
+uint32_t ImageWriter::BinSlot::GetIndex() const {
+  return lockword_ & ~kBinMask;
+}
+
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 83a4d2b..efcfc5e 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -32,6 +32,8 @@
 #include "mirror/dex_cache.h"
 #include "os.h"
 #include "safe_map.h"
+#include "gc/space/space.h"
+#include "utils.h"
 
 namespace art {
 
@@ -39,12 +41,13 @@
 class ImageWriter FINAL {
  public:
   explicit ImageWriter(const CompilerDriver& compiler_driver)
-      : compiler_driver_(compiler_driver), oat_file_(NULL), image_end_(0), image_begin_(NULL),
+      : compiler_driver_(compiler_driver), oat_file_(NULL), image_end_(0),
+        image_objects_offset_begin_(0), image_begin_(NULL),
         oat_data_begin_(NULL), interpreter_to_interpreter_bridge_offset_(0),
         interpreter_to_compiled_code_bridge_offset_(0), portable_imt_conflict_trampoline_offset_(0),
         portable_resolution_trampoline_offset_(0), quick_generic_jni_trampoline_offset_(0),
         quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
-        compile_pic_(false) {}
+        compile_pic_(false), target_ptr_size_(0), bin_slot_sizes_(), bin_slot_count_() {}
 
   ~ImageWriter() {}
 
@@ -65,14 +68,69 @@
   // Mark the objects defined in this space in the given live bitmap.
   void RecordImageAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Classify different kinds of bins that objects end up getting packed into during image writing.
+  enum Bin {
+    // Likely-clean:
+    kBinString,                        // [String] Almost always immutable (except for obj header).
+    kBinArtMethodsManagedInitialized,  // [ArtMethod] Not-native, and initialized. Unlikely to dirty
+    // Unknown mix of clean/dirty:
+    kBinRegular,
+    // Likely-dirty:
+    // All classes get their own bins since their fields often dirty
+    kBinClassInitializedFinalStatics,  // Class initializers have been run, no non-final statics
+    kBinClassInitialized,         // Class initializers have been run
+    kBinClassVerified,            // Class verified, but initializers haven't been run
+    kBinArtMethodNative,          // Art method that is actually native
+    kBinArtMethodNotInitialized,  // Art method with a declaring class that wasn't initialized
+    // Don't care about other art methods since they don't dirty
+    // Add more bins here if we add more segregation code.
+    kBinSize,
+  };
+
+  static constexpr size_t kBinBits = MinimumBitsToStore(kBinSize - 1);
+  // uint32 = typeof(lockword_)
+  static constexpr size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits;
+  // 111000.....0
+  static constexpr size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
+
+  // We use the lock word to store the bin # and bin index of the object in the image.
+  //
+  // The struct size must be exactly sizeof(LockWord), currently 32-bits, since this will end up
+  // stored in the lock word bit-for-bit when object forwarding addresses are being calculated.
+  struct BinSlot {
+    explicit BinSlot(uint32_t lockword);
+    BinSlot(Bin bin, uint32_t index);
+
+    // The bin an object belongs to, i.e. regular, class/verified, class/initialized, etc.
+    Bin GetBin() const;
+    // The offset in bytes from the beginning of the bin. Aligned to object size.
+    uint32_t GetIndex() const;
+    // Pack into a single uint32_t, for storing into a lock word.
+    explicit operator uint32_t() const { return lockword_; }
+    // Comparison operator for map support
+    bool operator<(const BinSlot& other) const  { return lockword_ < other.lockword_; }
+
+  private:
+    // Must be the same size as LockWord, any larger and we would truncate the data.
+    const uint32_t lockword_;
+  };
+
   // We use the lock word to store the offset of the object in the image.
-  void AssignImageOffset(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetImageOffset(mirror::Object* object, size_t offset)
+  void AssignImageOffset(mirror::Object* object, BinSlot bin_slot)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetImageOffset(mirror::Object* object, BinSlot bin_slot, size_t offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsImageOffsetAssigned(mirror::Object* object) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void AssignImageBinSlot(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetImageBinSlot(mirror::Object* object, BinSlot bin_slot)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsImageBinSlotAssigned(mirror::Object* object) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  BinSlot GetImageBinSlot(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void* GetImageAddressCallback(void* writer, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return reinterpret_cast<ImageWriter*>(writer)->GetImageAddress(obj);
@@ -141,7 +199,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* CreateImageRoots() const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CalculateObjectOffsets(mirror::Object* obj)
+  void CalculateObjectBinSlots(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void UnbinObjectsIntoOffset(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void WalkInstanceFields(mirror::Object* obj, mirror::Class* klass)
@@ -150,6 +210,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void WalkFieldsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
   void CopyAndFixupObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -171,6 +233,9 @@
   void PatchOatCodeAndMethods(File* elf_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins.
+  size_t GetBinSizeSum(Bin up_to = kBinSize) const;
+
   const CompilerDriver& compiler_driver_;
 
   // oat file with code for this image
@@ -182,12 +247,18 @@
   // Offset to the free space in image_.
   size_t image_end_;
 
+  // Offset from image_begin_ to where the first object is in image_.
+  size_t image_objects_offset_begin_;
+
   // Beginning target image address for the output image.
   byte* image_begin_;
 
   // Saved hashes (objects are inside of the image so that they don't move).
   std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
 
+  // Saved hashes (objects are bin slots to inside of the image, not yet allocated an address).
+  std::map<BinSlot, uint32_t> saved_hashes_map_;
+
   // Beginning target oat address for the pointers from the output image to its oat file.
   const byte* oat_data_begin_;
 
@@ -210,6 +281,10 @@
   // Size of pointers on the target architecture.
   size_t target_ptr_size_;
 
+  // Bin slot tracking for dirty object packing
+  size_t bin_slot_sizes_[kBinSize];  // Number of bytes in a bin
+  size_t bin_slot_count_[kBinSize];  // Number of objects in a bin
+
   friend class FixupVisitor;
   friend class FixupClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
diff --git a/runtime/utils.h b/runtime/utils.h
index 5bdbba8..a0f5bd4 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -167,6 +167,18 @@
   typedef T type;
 };
 
+// Like sizeof, but count how many bits a type takes. Pass type explicitly.
+template <typename T>
+static constexpr size_t BitSizeOf() {
+  return sizeof(T) * CHAR_BIT;
+}
+
+// Like sizeof, but count how many bits a type takes. Infers type from parameter.
+template <typename T>
+static constexpr size_t BitSizeOf(T x) {
+  return sizeof(T) * CHAR_BIT;
+}
+
 // For rounding integers.
 template<typename T>
 static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) WARN_UNUSED;
@@ -203,6 +215,22 @@
   return reinterpret_cast<T*>(RoundUp(reinterpret_cast<uintptr_t>(x), n));
 }
 
+namespace utils {
+namespace detail {  // Private, implementation-specific namespace. Do not poke outside of this file.
+template <typename T>
+static constexpr inline T RoundUpToPowerOfTwoRecursive(T x, size_t bit) {
+  return bit == (BitSizeOf<T>()) ? x: RoundUpToPowerOfTwoRecursive(x | x >> bit, bit << 1);
+}
+}  // namespace detail
+}  // namespace utils
+
+// Recursive implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
+// figure 3-3, page 48, where the function is called clp2.
+template <typename T>
+static constexpr inline T RoundUpToPowerOfTwo(T x) {
+  return art::utils::detail::RoundUpToPowerOfTwoRecursive(x - 1, 1) + 1;
+}
+
 // Implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
 // figure 3-3, page 48, where the function is called clp2.
 static inline uint32_t RoundUpToPowerOfTwo(uint32_t x) {
@@ -215,10 +243,23 @@
   return x + 1;
 }
 
+// Find the bit position of the most significant bit (0-based), or -1 if there were no bits set.
+template <typename T>
+static constexpr ssize_t MostSignificantBit(T value) {
+  return (value == 0) ? -1 : (MostSignificantBit(value >> 1) + 1);
+}
+
+// How many bits (minimally) does it take to store the constant 'value'? i.e. 1 for 1, 3 for 5, etc.
+template <typename T>
+static constexpr size_t MinimumBitsToStore(T value) {
+  return static_cast<size_t>(MostSignificantBit(value) + 1);
+}
+
 template<typename T>
 static constexpr int CLZ(T x) {
+  static_assert(sizeof(T) <= sizeof(long long), "T too large, must be smaller than long long");  // NOLINT [runtime/int] [4]
   return (sizeof(T) == sizeof(uint32_t))
-      ? __builtin_clz(x)
+      ? __builtin_clz(x)  // TODO: __builtin_clz[ll] has undefined behavior for x=0
       : __builtin_clzll(x);
 }
 
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 1b2c3ee..bc2a004 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -402,4 +402,36 @@
   }
 }
 
+TEST_F(UtilsTest, RoundUpToPowerOfTwo) {
+  // Tests the constexpr variant since all the parameters are constexpr
+  EXPECT_EQ(0, RoundUpToPowerOfTwo(0));
+  EXPECT_EQ(1, RoundUpToPowerOfTwo(1));
+  EXPECT_EQ(2, RoundUpToPowerOfTwo(2));
+  EXPECT_EQ(4, RoundUpToPowerOfTwo(3));
+  EXPECT_EQ(8, RoundUpToPowerOfTwo(7));
+
+  EXPECT_EQ(0b10000L, RoundUpToPowerOfTwo(0b01101L));
+  EXPECT_EQ(1ULL << 63, RoundUpToPowerOfTwo(1ULL << 62 | 1ULL));
+}
+
+TEST_F(UtilsTest, MostSignificantBit) {
+  EXPECT_EQ(-1, MostSignificantBit(0));
+  EXPECT_EQ(0, MostSignificantBit(1));
+  EXPECT_EQ(31, MostSignificantBit(~static_cast<uint32_t>(0)));
+  EXPECT_EQ(2, MostSignificantBit(0b110));
+  EXPECT_EQ(2, MostSignificantBit(0b100));
+}
+
+TEST_F(UtilsTest, MinimumBitsToStore) {
+  EXPECT_EQ(0u, MinimumBitsToStore(0));
+  EXPECT_EQ(1u, MinimumBitsToStore(1));
+  EXPECT_EQ(2u, MinimumBitsToStore(0b10));
+  EXPECT_EQ(2u, MinimumBitsToStore(0b11));
+  EXPECT_EQ(3u, MinimumBitsToStore(0b100));
+  EXPECT_EQ(3u, MinimumBitsToStore(0b110));
+  EXPECT_EQ(3u, MinimumBitsToStore(0b101));
+  EXPECT_EQ(8u, MinimumBitsToStore(0xFF));
+  EXPECT_EQ(32u, MinimumBitsToStore(~static_cast<uint32_t>(0)));
+}
+
 }  // namespace art