Layout string data

Inspect dex code in class initializers and hot methods to find out
what const strings are likely to be resolved. Along with these,
look at static field accesses and group the field name and type with
other hot strings.

Group method shorties together.

Maps vdex PSS: 7478k -> 6900k
No change in odex PSS.

Changed dexlayout_test to use -v instead of comparing to a hex
output.

Bug: 36457259
Bug: 35800981

Test: test-art-host

(cherry picked from commit fa0aa0953875427d7381ac44a6427bd5b1d5020a)

Change-Id: Iaa5de649c5b2b1d9f178525d2f42fe00e3fb879f
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 105610e..0536f322 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1528,6 +1528,111 @@
   return new_class_data_order;
 }
 
+void DexLayout::LayoutStringData(const DexFile* dex_file) {
+  const size_t num_strings = header_->GetCollections().StringIds().size();
+  std::vector<bool> is_shorty(num_strings, false);
+  std::vector<bool> from_hot_method(num_strings, false);
+  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
+    // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
+    // as hot.
+    const bool is_profile_class =
+        info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
+    if (is_profile_class) {
+      from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
+    }
+    dex_ir::ClassData* data = class_def->GetClassData();
+    if (data == nullptr) {
+      continue;
+    }
+    for (size_t i = 0; i < 2; ++i) {
+      for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
+        const dex_ir::MethodId* method_id = method->GetMethodId();
+        dex_ir::CodeItem* code_item = method->GetCodeItem();
+        if (code_item == nullptr) {
+          continue;
+        }
+        const bool is_clinit = is_profile_class &&
+            (method->GetAccessFlags() & kAccConstructor) != 0 &&
+            (method->GetAccessFlags() & kAccStatic) != 0;
+        const bool method_executed = is_clinit ||
+            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+        if (!method_executed) {
+          continue;
+        }
+        is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
+        dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
+        if (fixups == nullptr) {
+          continue;
+        }
+        if (fixups->StringIds() != nullptr) {
+          // Add const-strings.
+          for (dex_ir::StringId* id : *fixups->StringIds()) {
+            from_hot_method[id->GetIndex()] = true;
+          }
+        }
+        // TODO: Only visit field ids from static getters and setters.
+        for (dex_ir::FieldId* id : *fixups->FieldIds()) {
+          // Add the field names and types from getters and setters.
+          from_hot_method[id->Name()->GetIndex()] = true;
+          from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
+        }
+      }
+    }
+  }
+  // Sort string data by specified order.
+  std::vector<dex_ir::StringId*> string_ids;
+  size_t min_offset = std::numeric_limits<size_t>::max();
+  size_t max_offset = 0;
+  size_t hot_bytes = 0;
+  for (auto& string_id : header_->GetCollections().StringIds()) {
+    string_ids.push_back(string_id.get());
+    const size_t cur_offset = string_id->DataItem()->GetOffset();
+    CHECK_NE(cur_offset, 0u);
+    min_offset = std::min(min_offset, cur_offset);
+    dex_ir::StringData* data = string_id->DataItem();
+    const size_t element_size = data->GetSize() + 1;  // Add one extra for null.
+    size_t end_offset = cur_offset + element_size;
+    if (is_shorty[string_id->GetIndex()] || from_hot_method[string_id->GetIndex()]) {
+      hot_bytes += element_size;
+    }
+    max_offset = std::max(max_offset, end_offset);
+  }
+  VLOG(compiler) << "Hot string data bytes " << hot_bytes << "/" << max_offset - min_offset;
+  std::sort(string_ids.begin(),
+            string_ids.end(),
+            [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
+                                           const dex_ir::StringId* b) {
+    const bool a_is_hot = from_hot_method[a->GetIndex()];
+    const bool b_is_hot = from_hot_method[b->GetIndex()];
+    if (a_is_hot != b_is_hot) {
+      return a_is_hot < b_is_hot;
+    }
+    // After hot methods are partitioned, subpartition shorties.
+    const bool a_is_shorty = is_shorty[a->GetIndex()];
+    const bool b_is_shorty = is_shorty[b->GetIndex()];
+    if (a_is_shorty != b_is_shorty) {
+      return a_is_shorty < b_is_shorty;
+    }
+    // Preserve order.
+    return a->DataItem()->GetOffset() < b->DataItem()->GetOffset();
+  });
+  // Now we know what order we want the string data, reorder the offsets.
+  size_t offset = min_offset;
+  for (dex_ir::StringId* string_id : string_ids) {
+    dex_ir::StringData* data = string_id->DataItem();
+    data->SetOffset(offset);
+    offset += data->GetSize() + 1;  // Add one extra for null.
+  }
+  if (offset > max_offset) {
+    const uint32_t diff = offset - max_offset;
+    // If we expanded the string data section, we need to update the offsets or else we will
+    // corrupt the next section when writing out.
+    FixupSections(header_->GetCollections().StringDatasOffset(), diff);
+    // Update file size.
+    header_->SetFileSize(header_->FileSize() + diff);
+  }
+}
+
 // Orders code items according to specified class data ordering.
 // NOTE: If the section following the code items is byte aligned, the last code item is left in
 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
@@ -1686,6 +1791,7 @@
 }
 
 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
+  LayoutStringData(dex_file);
   std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
   int32_t diff = LayoutCodeItems(new_class_data_order);
   // Move sections after ClassData by diff bytes.
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index f26b423..69117ad 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -109,6 +109,7 @@
 
   std::vector<dex_ir::ClassData*> LayoutClassDefsAndClassData(const DexFile* dex_file);
   int32_t LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order);
+  void LayoutStringData(const DexFile* dex_file);
   bool IsNextSectionCodeItemAligned(uint32_t offset);
   template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
   void FixupSections(uint32_t offset, uint32_t diff);
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index bd6548e..4ef48ff 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -43,18 +43,6 @@
 static const char kDexFileLayoutInputProfile[] =
     "cHJvADAwNAABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
 
-static const char kDexFileLayoutExpectedOutputDex[] =
-    "ZGV4CjAzNQD1KW3+B8NAB0f2A/ZVIBJ0aHrGIqcpVTAUAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAH"
-    "AAAAcAAAAAQAAACMAAAAAQAAAJwAAAAAAAAAAAAAAAMAAACoAAAAAgAAAMAAAAAUAQAAAAEAADAB"
-    "AAA4AQAAQAEAAEgBAABNAQAAUgEAAGYBAAADAAAABAAAAAUAAAAGAAAABgAAAAMAAAAAAAAAAAAA"
-    "AAAAAAABAAAAAAAAAAIAAAAAAAAAAQAAAAAAAAACAAAAAAAAAAIAAAAAAAAAdQEAAAAAAAAAAAAA"
-    "AAAAAAIAAAAAAAAAAQAAAAAAAAB/AQAAAAAAAAEAAQABAAAAbwEAAAQAAABwEAIAAAAOAAEAAQAB"
-    "AAAAaQEAAAQAAABwEAIAAAAOAAY8aW5pdD4ABkEuamF2YQAGQi5qYXZhAANMQTsAA0xCOwASTGph"
-    "dmEvbGFuZy9PYmplY3Q7AAFWAAQABw48AAQABw48AAAAAQABgIAEgAIAAAEAAICABJgCAAAACwAA"
-    "AAAAAAABAAAAAAAAAAEAAAAHAAAAcAAAAAIAAAAEAAAAjAAAAAMAAAABAAAAnAAAAAUAAAADAAAA"
-    "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
-    "AAAAdQEAAAAQAAABAAAAjAEAAA==";
-
 // Dex file with catch handler unreferenced by try blocks.
 // Constructed by building a dex file with try/catch blocks and hex editing.
 static const char kUnreferencedCatchHandlerInputDex[] =
@@ -314,26 +302,21 @@
     WriteFileBase64(kDexFileLayoutInputDex, dex_file.c_str());
     std::string profile_file = tmp_dir + "primary.prof";
     WriteFileBase64(kDexFileLayoutInputProfile, profile_file.c_str());
-    std::string expected_output = tmp_dir + "expected.dex";
-    WriteFileBase64(kDexFileLayoutExpectedOutputDex, expected_output.c_str());
     std::string output_dex = tmp_dir + "classes.dex.new";
 
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
     std::vector<std::string> dexlayout_exec_argv =
-        { dexlayout, "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
+        { dexlayout, "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
     if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
       return false;
     }
-    std::vector<std::string> diff_exec_argv =
-        { "/usr/bin/diff", expected_output, output_dex };
-    if (!::art::Exec(diff_exec_argv, error_msg)) {
-      return false;
-    }
+
+    // -v makes sure that the layout did not corrupt the dex file.
 
     std::vector<std::string> rm_exec_argv =
-        { "/bin/rm", dex_file, profile_file, expected_output, output_dex };
+        { "/bin/rm", dex_file, profile_file, output_dex };
     if (!::art::Exec(rm_exec_argv, error_msg)) {
       return false;
     }