Combine image string char arrays into single array

Having one giant char array shared between all the strings saves memory
since it avoids the 12 bytes of array overhead per image string. Also
added substring finding based on prefixes, strings are added into the
array in reverse sorted length.

Image size goes from 11767808 -> 11100160.

Bug: 17643507

Change-Id: I2a7f177b40d0458d5c50640643d8f16b0030bdce
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 871889f..1f94c8a 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -121,7 +121,7 @@
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
-    ComputeEagerResolvedStrings();
+    ProcessStrings();
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -262,6 +262,152 @@
   return true;
 }
 
+// Count the number of strings in the heap and put the result in arg as a size_t pointer.
+static void CountStringsCallback(Object* obj, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj->GetClass()->IsStringClass()) {
+    ++*reinterpret_cast<size_t*>(arg);
+  }
+}
+
+// Collect all the java.lang.String in the heap and put them in the output strings_ array.
+class StringCollector {
+ public:
+  StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index)
+      : strings_(strings), index_(index) {
+  }
+  static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    auto* collector = reinterpret_cast<StringCollector*>(arg);
+    if (obj->GetClass()->IsStringClass()) {
+      collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString());
+    }
+  }
+  size_t GetIndex() const {
+    return index_;
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::String>> strings_;
+  size_t index_;
+};
+
+// Compare strings based on length, used for sorting strings by length / reverse length.
+class StringLengthComparator {
+ public:
+  explicit StringLengthComparator(Handle<mirror::ObjectArray<mirror::String>> strings)
+      : strings_(strings) {
+  }
+  bool operator()(size_t a, size_t b) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return strings_->GetWithoutChecks(a)->GetLength() < strings_->GetWithoutChecks(b)->GetLength();
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::String>> strings_;
+};
+
+// If string a is a prefix of b or b is a prefix of a then they are considered equal. This
+// enables us to find prefixes instead of exact matches. Otherwise we do a normal string
+// comparison. The strings compared of the form <position, length> inside of the chars_ array.
+class SubstringComparator {
+ public:
+  explicit SubstringComparator(const std::vector<uint16_t>* const chars) : chars_(chars) {
+  }
+  bool operator()(const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b) {
+    size_t compare_length = std::min(a.second, b.second);
+    const uint16_t* ptr_a = &chars_->at(a.first);
+    const uint16_t* ptr_b = &chars_->at(b.first);
+    for (size_t i = 0; i < compare_length; ++i) {
+      if (ptr_a[i] != ptr_b[i]) {
+        return ptr_a[i] < ptr_b[i];
+      }
+    }
+    return false;
+  }
+
+ private:
+  const std::vector<uint16_t>* const chars_;
+};
+
+void ImageWriter::ProcessStrings() {
+  size_t total_strings = 0;
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  ClassLinker* cl = Runtime::Current()->GetClassLinker();
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    heap->VisitObjects(CountStringsCallback, &total_strings);  // Count the strings.
+  }
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
+  StringCollector string_collector(strings, 0U);
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    // Read strings into the array.
+    heap->VisitObjects(StringCollector::Callback, &string_collector);
+  }
+  // Some strings could have gotten freed if AllocStringArray caused a GC.
+  CHECK_LE(string_collector.GetIndex(), total_strings);
+  total_strings = string_collector.GetIndex();
+  size_t total_length = 0;
+  std::vector<size_t> reverse_sorted_strings;
+  for (size_t i = 0; i < total_strings; ++i) {
+    mirror::String* s = strings->GetWithoutChecks(i);
+    // Look up the string in the array.
+    total_length += s->GetLength();
+    reverse_sorted_strings.push_back(i);
+  }
+  // Sort by reverse length.
+  StringLengthComparator comparator(strings);
+  std::sort(reverse_sorted_strings.rbegin(), reverse_sorted_strings.rend(), comparator);
+  // Deduplicate prefixes and add strings to the char array.
+  std::vector<uint16_t> combined_chars(total_length, 0U);
+  size_t num_chars = 0;
+  // Characters of strings which are non equal prefix of another string (not the same string).
+  // We don't count the savings from equal strings since these would get interned later anyways.
+  size_t prefix_saved_chars = 0;
+  std::set<std::pair<size_t, size_t>, SubstringComparator> existing_strings((
+      SubstringComparator(&combined_chars)));
+  for (size_t i = 0; i < total_strings; ++i) {
+    mirror::String* s = strings->GetWithoutChecks(reverse_sorted_strings[i]);
+    // Add the string to the end of the char array.
+    size_t length = s->GetLength();
+    for (size_t j = 0; j < length; ++j) {
+      combined_chars[num_chars++] = s->CharAt(j);
+    }
+    // Try to see if the string exists as a prefix of an existing string.
+    size_t new_offset = 0;
+    std::pair<size_t, size_t> new_string(num_chars - length, length);
+    auto it = existing_strings.find(new_string);
+    if (it != existing_strings.end()) {
+      for (size_t j = 0; j < length; ++j) {
+        DCHECK_EQ(combined_chars[it->first + j], s->CharAt(j));
+      }
+      // Shares a prefix, set the offset to where the new offset will be.
+      new_offset = it->first;
+      // Remove the added chars.
+      num_chars -= length;
+      if (it->second != length) {
+        prefix_saved_chars += length;
+      }
+    } else {
+      new_offset = new_string.first;
+      existing_strings.insert(new_string);
+    }
+    s->SetOffset(new_offset);
+  }
+  // Allocate and update the char arrays.
+  auto* array = mirror::CharArray::Alloc(self, num_chars);
+  for (size_t i = 0; i < num_chars; ++i) {
+    array->SetWithoutChecks<false>(i, combined_chars[i]);
+  }
+  for (size_t i = 0; i < total_strings; ++i) {
+    strings->GetWithoutChecks(i)->SetArray(array);
+  }
+  VLOG(compiler) << "Total # image strings=" << total_strings << " combined length="
+      << total_length << " prefix saved chars=" << prefix_saved_chars;
+  ComputeEagerResolvedStrings();
+}
+
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg) {
   if (!obj->GetClass()->IsStringClass()) {
     return;
@@ -290,7 +436,7 @@
   }
 }
 
-void ImageWriter::ComputeEagerResolvedStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::ComputeEagerResolvedStrings() {
   ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
@@ -361,8 +507,7 @@
   return true;
 }
 
-void ImageWriter::CheckNonImageClassesRemoved()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::CheckNonImageClassesRemoved() {
   if (compiler_driver_.GetImageClasses() != nullptr) {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -583,8 +728,7 @@
   // Note that image_end_ is left at end of used space
 }
 
-void ImageWriter::CopyAndFixupObjects()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::CopyAndFixupObjects() {
   Thread* self = Thread::Current();
   const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
   gc::Heap* heap = Runtime::Current()->GetHeap();
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 61365fe..4b4190d 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -26,12 +26,12 @@
 
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
+#include "gc/space/space.h"
 #include "mem_map.h"
 #include "oat_file.h"
 #include "mirror/dex_cache.h"
 #include "os.h"
 #include "safe_map.h"
-#include "gc/space/space.h"
 
 namespace art {
 
@@ -123,13 +123,16 @@
   static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Combine string char arrays.
+  void ProcessStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static bool NonImageClassesVisitor(mirror::Class* c, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Verify unwanted classes removed.
-  void CheckNonImageClassesRemoved();
+  void CheckNonImageClassesRemoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -149,7 +152,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
-  void CopyAndFixupObjects();
+  void CopyAndFixupObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 1320ab7..631c19b 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -111,6 +111,14 @@
 
   int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Offset is only used during testing so use non-transactional mode.
+    DCHECK_LE(0, new_offset);
+    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
+  }
+
+  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static Class* GetJavaLangString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_String_.IsNull());
     return java_lang_String_.Read();
@@ -136,21 +144,12 @@
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
   }
 
-  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Offset is only used during testing so use non-transactional mode.
-    DCHECK_LE(0, new_offset);
-    DCHECK_GE(GetLength(), new_offset);
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
-  }
-
   static String* Alloc(Thread* self, int32_t utf16_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static String* Alloc(Thread* self, Handle<CharArray> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<CharArray> array_;
 
@@ -163,6 +162,7 @@
   static GcRoot<Class> java_lang_String_;
 
   friend struct art::StringOffsets;  // for verifying offset information
+
   FRIEND_TEST(ObjectTest, StringLength);  // for SetOffset and SetCount
   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
 };