Dump more dex file data in oatdump

Dump some statistics for each dex file along side with strings loaded
from code and dex code bytes.

Sample output:
Cumulative dex file data
Num string ids: 202809
Num method ids: 320464
Num field ids: 162822
Num type ids: 68151
Num class defs: 48061
Unique strings loaded from dex code: 51049
Total strings loaded from dex code: 106651
Number of unique dex code items: 247929
Total number of dex code bytes: 11090574

Added content testing to oat dump test. No significant slowdown.

TEST: test-art-host
Bug: 29462018

Change-Id: I60effd3087d8c427eda4ee26431d5d77165b3939
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 477c4f1..cc38f3e 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -21,9 +21,9 @@
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
+#include "string_reference.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm/assembler_thumb2.h"
-#include "utils/string_reference.h"
 #include "utils/type_reference.h"
 
 namespace art {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d4bf695..c2f055a 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -24,8 +24,8 @@
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/string_reference.h"
 #include "utils/type_reference.h"
 #include "vixl/a64/disasm-a64.h"
 #include "vixl/a64/macro-assembler-a64.h"
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
index df5080e..0fd9e5b 100644
--- a/compiler/utils/string_reference_test.cc
+++ b/compiler/utils/string_reference_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "utils/string_reference.h"
+#include "string_reference.h"
 
 #include <memory>
 
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
index bd0739f..d0c1656 100644
--- a/compiler/utils/type_reference.h
+++ b/compiler/utils/type_reference.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 
 #include "base/logging.h"
-#include "utils/string_reference.h"
+#include "string_reference.h"
 
 namespace art {
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 3f031a3..64349b5 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -56,8 +56,9 @@
 #include "os.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "stack_map.h"
 #include "ScopedLocalRef.h"
+#include "stack_map.h"
+#include "string_reference.h"
 #include "thread_list.h"
 #include "type_lookup_table.h"
 #include "verifier/method_verifier.h"
@@ -447,6 +448,28 @@
       os << StringPrintf("0x%08x\n\n", resolved_addr2instr_);
     }
 
+    // Dumping the dex file overview is compact enough to do even if header only.
+    DexFileData cumulative;
+    for (size_t i = 0; i < oat_dex_files_.size(); i++) {
+      const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
+      CHECK(oat_dex_file != nullptr);
+      std::string error_msg;
+      const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+      if (dex_file == nullptr) {
+        os << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation() << "': "
+           << error_msg;
+        continue;
+      }
+      DexFileData data(*dex_file);
+      os << "Dex file data for " << dex_file->GetLocation() << "\n";
+      data.Dump(os);
+      os << "\n";
+      cumulative.Add(data);
+    }
+    os << "Cumulative dex file data\n";
+    cumulative.Dump(os);
+    os << "\n";
+
     if (!options_.dump_header_only_) {
       for (size_t i = 0; i < oat_dex_files_.size(); i++) {
         const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
@@ -568,6 +591,122 @@
     offsets_.insert(oat_method.GetVmapTableOffset());
   }
 
+  // Dex file data, may be for multiple different dex files.
+  class DexFileData {
+   public:
+    DexFileData() {}
+
+    explicit DexFileData(const DexFile& dex_file)
+        : num_string_ids_(dex_file.NumStringIds()),
+          num_method_ids_(dex_file.NumMethodIds()),
+          num_field_ids_(dex_file.NumFieldIds()),
+          num_type_ids_(dex_file.NumTypeIds()),
+          num_class_defs_(dex_file.NumClassDefs()) {
+      for (size_t class_def_index = 0; class_def_index < num_class_defs_; ++class_def_index) {
+        const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
+        WalkClass(dex_file, class_def);
+      }
+    }
+
+    void Add(const DexFileData& other) {
+      AddAll(unique_string_ids_from_code_, other.unique_string_ids_from_code_);
+      num_string_ids_from_code_ += other.num_string_ids_from_code_;
+      AddAll(dex_code_item_ptrs_, other.dex_code_item_ptrs_);
+      dex_code_bytes_ += other.dex_code_bytes_;
+      num_string_ids_ += other.num_string_ids_;
+      num_method_ids_ += other.num_method_ids_;
+      num_field_ids_ += other.num_field_ids_;
+      num_type_ids_ += other.num_type_ids_;
+      num_class_defs_ += other.num_class_defs_;
+    }
+
+    void Dump(std::ostream& os) {
+      os << "Num string ids: " << num_string_ids_ << "\n";
+      os << "Num method ids: " << num_method_ids_ << "\n";
+      os << "Num field ids: " << num_field_ids_ << "\n";
+      os << "Num type ids: " << num_type_ids_ << "\n";
+      os << "Num class defs: " << num_class_defs_ << "\n";
+      os << "Unique strings loaded from dex code: " << unique_string_ids_from_code_.size() << "\n";
+      os << "Total strings loaded from dex code: " << num_string_ids_from_code_ << "\n";
+      os << "Number of unique dex code items: " << dex_code_item_ptrs_.size() << "\n";
+      os << "Total number of dex code bytes: " << dex_code_bytes_ << "\n";
+    }
+
+  private:
+    void WalkClass(const DexFile& dex_file, const DexFile::ClassDef& class_def) {
+      const uint8_t* class_data = dex_file.GetClassData(class_def);
+      if (class_data == nullptr) {  // empty class such as a marker interface?
+        return;
+      }
+      ClassDataItemIterator it(dex_file, class_data);
+      SkipAllFields(it);
+      while (it.HasNextDirectMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      while (it.HasNextVirtualMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+
+    void WalkCodeItem(const DexFile& dex_file, const DexFile::CodeItem* code_item) {
+      if (code_item == nullptr) {
+        return;
+      }
+      const size_t code_item_size = code_item->insns_size_in_code_units_;
+      const uint16_t* code_ptr = code_item->insns_;
+      const uint16_t* code_end = code_item->insns_ + code_item_size;
+
+      // If we inserted a new dex code item pointer, add to total code bytes.
+      if (dex_code_item_ptrs_.insert(code_ptr).second) {
+        dex_code_bytes_ += code_item_size * sizeof(code_ptr[0]);
+      }
+
+      while (code_ptr < code_end) {
+        const Instruction* inst = Instruction::At(code_ptr);
+        switch (inst->Opcode()) {
+          case Instruction::CONST_STRING: {
+            const uint32_t string_index = inst->VRegB_21c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          case Instruction::CONST_STRING_JUMBO: {
+            const uint32_t string_index = inst->VRegB_31c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          default:
+            break;
+        }
+
+        code_ptr += inst->SizeInCodeUnits();
+      }
+    }
+
+    // Unique string ids loaded from dex code.
+    std::set<StringReference, StringReferenceComparator> unique_string_ids_from_code_;
+
+    // Total string ids loaded from dex code.
+    size_t num_string_ids_from_code_ = 0;
+
+    // Unique code pointers.
+    std::set<const void*> dex_code_item_ptrs_;
+
+    // Total "unique" dex code bytes.
+    size_t dex_code_bytes_ = 0;
+
+    // Other dex ids.
+    size_t num_string_ids_ = 0;
+    size_t num_method_ids_ = 0;
+    size_t num_field_ids_ = 0;
+    size_t num_type_ids_ = 0;
+    size_t num_class_defs_ = 0;
+  };
+
   bool DumpOatDexFile(std::ostream& os, const OatFile::OatDexFile& oat_dex_file) {
     bool success = true;
     bool stop_analysis = false;
@@ -578,7 +717,6 @@
     // Print embedded dex file data range.
     const uint8_t* const oat_file_begin = oat_dex_file.GetOatFile()->Begin();
     const uint8_t* const dex_file_pointer = oat_dex_file.GetDexFilePointer();
-    std::set<uint32_t> string_ids;
     uint32_t dex_offset = dchecked_integral_cast<uint32_t>(dex_file_pointer - oat_file_begin);
     os << StringPrintf("dex-file: 0x%08x..0x%08x\n",
                        dex_offset,
@@ -623,8 +761,10 @@
          << " (" << oat_class.GetStatus() << ")"
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
-      if (options_.list_classes_) continue;
-      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis, string_ids)) {
+      if (options_.list_classes_) {
+        continue;
+      }
+      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis)) {
         success = false;
       }
       if (stop_analysis) {
@@ -632,7 +772,7 @@
         return success;
       }
     }
-    os << "Number of unique strings loaded from dex code: " << string_ids.size() << "\n";
+    os << "\n";
     os << std::flush;
     return success;
   }
@@ -726,8 +866,7 @@
 
   bool DumpOatClass(VariableIndentationOutputStream* vios,
                     const OatFile::OatClass& oat_class, const DexFile& dex_file,
-                    const DexFile::ClassDef& class_def, bool* stop_analysis,
-                    std::set<uint32_t>& string_ids) {
+                    const DexFile::ClassDef& class_def, bool* stop_analysis) {
     bool success = true;
     bool addr_found = false;
     const uint8_t* class_data = dex_file.GetClassData(class_def);
@@ -741,7 +880,7 @@
     while (it.HasNextDirectMethod()) {
       if (!DumpOatMethod(vios, class_def, class_method_index, oat_class, dex_file,
                          it.GetMemberIndex(), it.GetMethodCodeItem(),
-                         it.GetRawMemberAccessFlags(), &addr_found, string_ids)) {
+                         it.GetRawMemberAccessFlags(), &addr_found)) {
         success = false;
       }
       if (addr_found) {
@@ -754,7 +893,7 @@
     while (it.HasNextVirtualMethod()) {
       if (!DumpOatMethod(vios, class_def, class_method_index, oat_class, dex_file,
                          it.GetMemberIndex(), it.GetMethodCodeItem(),
-                         it.GetRawMemberAccessFlags(), &addr_found, string_ids)) {
+                         it.GetRawMemberAccessFlags(), &addr_found)) {
         success = false;
       }
       if (addr_found) {
@@ -779,35 +918,9 @@
                      uint32_t class_method_index,
                      const OatFile::OatClass& oat_class, const DexFile& dex_file,
                      uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
-                     uint32_t method_access_flags, bool* addr_found,
-                     std::set<uint32_t>& string_ids) {
+                     uint32_t method_access_flags, bool* addr_found) {
     bool success = true;
 
-    if (code_item != nullptr) {
-      const uint16_t* code_ptr = code_item->insns_;
-      const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
-
-      while (code_ptr < code_end) {
-        const Instruction* inst = Instruction::At(code_ptr);
-        switch (inst->Opcode()) {
-          case Instruction::CONST_STRING: {
-            uint32_t string_index = inst->VRegB_21c();
-            string_ids.insert(string_index);
-            break;
-          }
-          case Instruction::CONST_STRING_JUMBO: {
-            uint32_t string_index = inst->VRegB_31c();
-            string_ids.insert(string_index);
-            break;
-          }
-
-          default:
-            break;
-        }
-
-        code_ptr += inst->SizeInCodeUnits();
-      }
-    }
     // TODO: Support regex
     std::string method_name = dex_file.GetMethodName(dex_file.GetMethodId(dex_method_idx));
     if (method_name.find(options_.method_filter_) == std::string::npos) {
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index c7ced8a..004defe 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
 #include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
 #include "runtime/arch/instruction_set.h"
 #include "runtime/gc/heap.h"
 #include "runtime/gc/space/image_space.h"
@@ -58,26 +59,127 @@
   };
 
   // Run the test with custom arguments.
-  bool Exec(Mode mode, const std::vector<std::string>& args, std::string* error_msg) {
+  bool Exec(Mode mode,
+            const std::vector<std::string>& args,
+            bool list_only,
+            std::string* error_msg) {
     std::string file_path = GetOatDumpFilePath();
 
     EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
 
+    // ScratchFile scratch;
     std::vector<std::string> exec_argv = { file_path };
+    std::vector<std::string> expected_prefixes;
     if (mode == kModeSymbolize) {
       exec_argv.push_back("--symbolize=" + core_oat_location_);
       exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
-    } else if (mode == kModeArt) {
-      exec_argv.push_back("--image=" + core_art_location_);
-      exec_argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
-      exec_argv.push_back("--output=/dev/null");
     } else {
-      CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
-      exec_argv.push_back("--oat-file=" + core_oat_location_);
-      exec_argv.push_back("--output=/dev/null");
+      expected_prefixes.push_back("Dex file data for");
+      expected_prefixes.push_back("Num string ids:");
+      expected_prefixes.push_back("Num field ids:");
+      expected_prefixes.push_back("Num method ids:");
+      expected_prefixes.push_back("LOCATION:");
+      expected_prefixes.push_back("MAGIC:");
+      expected_prefixes.push_back("DEX FILE COUNT:");
+      if (!list_only) {
+        // Code and dex code do not show up if list only.
+        expected_prefixes.push_back("DEX CODE:");
+        expected_prefixes.push_back("CODE:");
+      }
+      if (mode == kModeArt) {
+        exec_argv.push_back("--image=" + core_art_location_);
+        exec_argv.push_back("--instruction-set=" + std::string(
+            GetInstructionSetString(kRuntimeISA)));
+        expected_prefixes.push_back("IMAGE LOCATION:");
+        expected_prefixes.push_back("IMAGE BEGIN:");
+        expected_prefixes.push_back("kDexCaches:");
+      } else {
+        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
+        exec_argv.push_back("--oat-file=" + core_oat_location_);
+      }
     }
     exec_argv.insert(exec_argv.end(), args.begin(), args.end());
-    return ::art::Exec(exec_argv, error_msg);
+
+    bool result = true;
+    // We must set --android-root.
+    int link[2];
+    if (pipe(link) == -1) {
+      return false;
+    }
+
+    const pid_t pid = fork();
+    if (pid == -1) {
+      return false;
+    }
+
+    if (pid == 0) {
+      dup2(link[1], STDOUT_FILENO);
+      close(link[0]);
+      close(link[1]);
+      exit(::art::Exec(exec_argv, error_msg) ? 0 : 1);
+    } else {
+      close(link[1]);
+      static const size_t kLineMax = 256;
+      char line[kLineMax] = {};
+      size_t line_len = 0;
+      size_t total = 0;
+      std::vector<bool> found(expected_prefixes.size(), false);
+      while (true) {
+        while (true) {
+          size_t spaces = 0;
+          // Trim spaces at the start of the line.
+          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
+          if (spaces > 0) {
+            line_len -= spaces;
+            memmove(&line[0], &line[spaces], line_len);
+          }
+          ssize_t bytes_read =
+              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
+          if (bytes_read <= 0) {
+            break;
+          }
+          line_len += bytes_read;
+          total += bytes_read;
+        }
+        if (line_len == 0) {
+          break;
+        }
+        // Check contents.
+        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+          const std::string& expected = expected_prefixes[i];
+          if (!found[i] &&
+              line_len >= expected.length() &&
+              memcmp(line, expected.c_str(), expected.length()) == 0) {
+            found[i] = true;
+          }
+        }
+        // Skip to next line.
+        size_t next_line = 0;
+        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
+        line_len -= next_line + 1;
+        memmove(&line[0], &line[next_line + 1], line_len);
+      }
+      if (mode == kModeSymbolize) {
+        EXPECT_EQ(total, 0u);
+      } else {
+        EXPECT_GT(total, 0u);
+      }
+      LOG(INFO) << "Processed bytes " << total;
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        result = (status == 0);
+      }
+
+      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+        if (!found[i]) {
+          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
+          result = false;
+        }
+      }
+    }
+
+    return result;
   }
 
  private:
@@ -89,37 +191,37 @@
 #if !defined(__arm__) && !defined(__mips__)
 TEST_F(OatDumpTest, TestImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestOatImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeOat, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeOat, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDisassemble) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListClasses) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListMethods) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestSymbolize) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeSymbolize, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeSymbolize, {}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 #endif
 }  // namespace art
diff --git a/compiler/utils/string_reference.h b/runtime/string_reference.h
similarity index 84%
rename from compiler/utils/string_reference.h
rename to runtime/string_reference.h
index e4c34ca..c75c218 100644
--- a/compiler/utils/string_reference.h
+++ b/runtime/string_reference.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_UTILS_STRING_REFERENCE_H_
-#define ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#ifndef ART_RUNTIME_STRING_REFERENCE_H_
+#define ART_RUNTIME_STRING_REFERENCE_H_
 
 #include <stdint.h>
 
@@ -37,6 +37,16 @@
   uint32_t string_index;
 };
 
+// Compare only the reference and not the string contents.
+struct StringReferenceComparator {
+  bool operator()(const StringReference& a, const StringReference& b) {
+    if (a.dex_file != b.dex_file) {
+      return a.dex_file < b.dex_file;
+    }
+    return a.string_index < b.string_index;
+  }
+};
+
 // Compare the actual referenced string values. Used for string reference deduplication.
 struct StringReferenceValueComparator {
   bool operator()(StringReference sr1, StringReference sr2) const {
@@ -62,4 +72,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#endif  // ART_RUNTIME_STRING_REFERENCE_H_
diff --git a/runtime/utils.h b/runtime/utils.h
index c1e88a4..b2746ee 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -382,13 +382,19 @@
 #endif
 
 template <typename T>
-T GetRandomNumber(T min, T max) {
+static T GetRandomNumber(T min, T max) {
   CHECK_LT(min, max);
   std::uniform_int_distribution<T> dist(min, max);
   RNG rng;
   return dist(rng);
 }
 
+// All of the elements from one container to another.
+template <typename Dest, typename Src>
+static void AddAll(Dest& dest, const Src& src) {
+  dest.insert(src.begin(), src.end());
+}
+
 // Return the file size in bytes or -1 if the file does not exists.
 int64_t GetFileSizeBytes(const std::string& filename);