Simpleperf: support reading symbols from .gnu_debugdata.

As in https://sourceware.org/gdb/onlinedocs/gdb/MiniDebugInfo.html,
elf files can store mini debug information in .gnu_debugdata.

Bug: 27744639

Change-Id: If4a53a4a1332824388ee309ac63a008dc5cf8d5c
diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk
index 1ec496e..2d01049 100644
--- a/simpleperf/Android.mk
+++ b/simpleperf/Android.mk
@@ -39,6 +39,7 @@
   libLLVM \
 
 simpleperf_static_libraries_target := \
+  liblzma \
   libziparchive \
   libz \
 
@@ -75,6 +76,7 @@
   libziparchive-host \
   libbase \
   liblog \
+  liblzma \
   libz \
   libutils \
 
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index e27f71e..bf1ff46 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -73,6 +73,12 @@
   ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
 }
 
+TEST_F(ReportCommandTest, report_symbol_from_elf_file_with_mini_debug_info) {
+  Report(PERF_DATA_WITH_MINI_DEBUG_INFO);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
+}
+
 TEST_F(ReportCommandTest, sort_option_pid) {
   Report(PERF_DATA, {"--sort", "pid"});
   ASSERT_TRUE(success);
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
index 02363d6..e485e7b 100644
--- a/simpleperf/get_test_data.h
+++ b/simpleperf/get_test_data.h
@@ -29,8 +29,10 @@
 static const std::string PERF_DATA = "perf.data";
 static const std::string CALLGRAPH_FP_PERF_DATA = "perf_g_fp.data";
 static const std::string BRANCH_PERF_DATA = "perf_b.data";
+static const std::string PERF_DATA_WITH_MINI_DEBUG_INFO = "perf_with_mini_debug_info.data";
 
 static const std::string ELF_FILE = "elf";
+static const std::string ELF_FILE_WITH_MINI_DEBUG_INFO = "elf_with_mini_debug_info";
 
 static const std::string APK_FILE = "data/app/com.example.hellojni-1/base.apk";
 static const std::string NATIVELIB_IN_APK = "lib/arm64-v8a/libhello-jni.so";
diff --git a/simpleperf/read_elf.cpp b/simpleperf/read_elf.cpp
index 05b06aa..7ba6369 100644
--- a/simpleperf/read_elf.cpp
+++ b/simpleperf/read_elf.cpp
@@ -101,17 +101,17 @@
 }
 
 template <class ELFT>
-bool GetBuildIdFromELFFile(const llvm::object::ELFFile<ELFT>* elf, BuildId* build_id) {
-  for (auto section_iterator = elf->section_begin(); section_iterator != elf->section_end();
-       ++section_iterator) {
-    if (section_iterator->sh_type == llvm::ELF::SHT_NOTE) {
-      auto contents = elf->getSectionContents(&*section_iterator);
-      if (contents.getError()) {
+bool GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) {
+  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
+    const llvm::object::ELFSectionRef& section_ref = *it;
+    if (section_ref.getType() == llvm::ELF::SHT_NOTE) {
+      llvm::StringRef data;
+      if (it->getContents(data)) {
         LOG(DEBUG) << "read note section error";
         continue;
       }
-      if (GetBuildIdFromNoteSection(reinterpret_cast<const char*>(contents->data()),
-                                    contents->size(), build_id)) {
+      if (GetBuildIdFromNoteSection(reinterpret_cast<const char*>(data.data()),
+                                    data.size(), build_id)) {
         return true;
       }
     }
@@ -122,9 +122,9 @@
 static bool GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
   bool result = false;
   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
-    result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
+    result = GetBuildIdFromELFFile(elf, build_id);
   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
-    result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
+    result = GetBuildIdFromELFFile(elf, build_id);
   } else {
     LOG(ERROR) << "unknown elf format in file " << obj->getFileName().data();
     return false;
@@ -180,6 +180,23 @@
   return ret;
 }
 
+static BinaryRet OpenObjectFileFromString(const std::string& s, const std::string& content_name) {
+  BinaryRet ret;
+  auto buffer = llvm::MemoryBuffer::getMemBuffer(s);
+  auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef());
+  if (!binary_or_err) {
+    LOG(ERROR) << content_name << " is not a binary file: " << binary_or_err.getError().message();
+    return ret;
+  }
+  ret.binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
+                                                                std::move(buffer));
+  ret.obj = llvm::dyn_cast<llvm::object::ObjectFile>(ret.binary.getBinary());
+  if (ret.obj == nullptr) {
+    LOG(ERROR) << content_name << " is not an object file";
+  }
+  return ret;
+}
+
 bool GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
   if (!IsValidElfPath(filename)) {
     return false;
@@ -198,6 +215,31 @@
   return GetBuildIdFromObjectFile(ret.obj, build_id);
 }
 
+template <class ELFT>
+bool ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name,
+                            std::string* content, bool report_error = true) {
+  for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) {
+    llvm::StringRef name;
+    if (it->getName(name) || name != section_name) {
+      continue;
+    }
+    llvm::StringRef data;
+    std::error_code err = it->getContents(data);
+    if (err) {
+      if (report_error) {
+        LOG(ERROR) << "failed to read section " << section_name << ": " << err;
+      }
+      return false;
+    }
+    *content = data;
+    return true;
+  }
+  if (report_error) {
+    LOG(ERROR) << "can't find section " << section_name;
+  }
+  return false;
+}
+
 bool IsArmMappingSymbol(const char* name) {
   // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
   // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
@@ -205,48 +247,41 @@
   return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
 }
 
-template <class ELFT>
-void ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf_obj,
-                             std::function<void(const ElfFileSymbol&)> callback) {
-  auto elf = elf_obj->getELFFile();
-  bool is_arm = (elf->getHeader()->e_machine == llvm::ELF::EM_ARM ||
-                 elf->getHeader()->e_machine == llvm::ELF::EM_AARCH64);
-  auto begin = elf_obj->symbol_begin();
-  auto end = elf_obj->symbol_end();
-  if (begin == end) {
-    begin = elf_obj->dynamic_symbol_begin();
-    end = elf_obj->dynamic_symbol_end();
-  }
-  for (; begin != end; ++begin) {
+void ReadSymbolTable(llvm::object::symbol_iterator sym_begin,
+                     llvm::object::symbol_iterator sym_end,
+                     std::function<void(const ElfFileSymbol&)> callback,
+                     bool is_arm) {
+  for (; sym_begin != sym_end; ++sym_begin) {
     ElfFileSymbol symbol;
-    auto elf_symbol = static_cast<const llvm::object::ELFSymbolRef*>(&*begin);
-    auto section_it = elf_symbol->getSection();
-    if (!section_it) {
+    auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin);
+    llvm::ErrorOr<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection();
+    if (!section_it_or_err) {
       continue;
     }
-    llvm::StringRef section_name;
-    if (section_it.get()->getName(section_name) || section_name.empty()) {
-      continue;
-    }
-    if (section_name.str() == ".text") {
-      symbol.is_in_text_section = true;
-    }
 
-    auto symbol_name = elf_symbol->getName();
-    if (!symbol_name || symbol_name.get().empty()) {
+    llvm::StringRef section_name;
+    if (section_it_or_err.get()->getName(section_name) || section_name.empty()) {
       continue;
     }
-    symbol.name = symbol_name.get();
-    symbol.vaddr = elf_symbol->getValue();
+    if (section_name == ".text") {
+      symbol.is_in_text_section = true;
+    }
+    llvm::ErrorOr<llvm::StringRef> symbol_name_or_err = symbol_ref->getName();
+    if (!symbol_name_or_err || symbol_name_or_err.get().empty()) {
+      continue;
+    }
+
+    symbol.name = symbol_name_or_err.get();
+    symbol.vaddr = symbol_ref->getValue();
     if ((symbol.vaddr & 1) != 0 && is_arm) {
       // Arm sets bit 0 to mark it as thumb code, remove the flag.
       symbol.vaddr &= ~1;
     }
-    symbol.len = elf_symbol->getSize();
-    int type = elf_symbol->getELFType();
-    if (type == llvm::ELF::STT_FUNC) {
+    symbol.len = symbol_ref->getSize();
+    llvm::object::SymbolRef::Type symbol_type = symbol_ref->getType();
+    if (symbol_type == llvm::object::SymbolRef::ST_Function) {
       symbol.is_func = true;
-    } else if (type == llvm::ELF::STT_NOTYPE) {
+    } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) {
       if (symbol.is_in_text_section) {
         symbol.is_label = true;
         if (is_arm) {
@@ -265,6 +300,34 @@
   }
 }
 
+template <class ELFT>
+void ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf,
+                             std::function<void(const ElfFileSymbol&)> callback) {
+  auto machine = elf->getELFFile()->getHeader()->e_machine;
+  bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64);
+  if (elf->symbol_begin() != elf->symbol_end()) {
+    ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm);
+  } else if (elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) {
+    ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm);
+  }
+  std::string debugdata;
+  if (ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata, false)) {
+    LOG(VERBOSE) << "Read .gnu_debugdata from " << elf->getFileName().str();
+    std::string decompressed_data;
+    if (XzDecompress(debugdata, &decompressed_data)) {
+      std::string content_name = std::string(".gnu_debugdata in ") + elf->getFileName().str();
+      BinaryRet ret = OpenObjectFileFromString(decompressed_data, content_name);
+      if (ret.obj != nullptr) {
+        if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
+          ParseSymbolsFromELFFile(elf, callback);
+        } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
+          ParseSymbolsFromELFFile(elf, callback);
+        }
+      }
+    }
+  }
+}
+
 bool MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id,
                   const std::string& debug_filename) {
   if (expected_build_id.IsEmpty()) {
@@ -294,6 +357,7 @@
 bool ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
                                      uint32_t file_size, const BuildId& expected_build_id,
                                      std::function<void(const ElfFileSymbol&)> callback) {
+  LOG(VERBOSE) << "Parse symbols from file " << filename;
   BinaryRet ret = OpenObjectFile(filename, file_offset, file_size);
   if (ret.obj == nullptr || !MatchBuildId(ret.obj, expected_build_id, filename)) {
     return false;
@@ -354,25 +418,6 @@
   return result;
 }
 
-template <class ELFT>
-bool ReadSectionFromELFFile(const llvm::object::ELFFile<ELFT>* elf, const std::string& section_name,
-                            std::string* content) {
-  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
-    auto name_or_err = elf->getSectionName(&*it);
-    if (name_or_err && *name_or_err == section_name) {
-      auto data_or_err = elf->getSectionContents(&*it);
-      if (!data_or_err) {
-        LOG(ERROR) << "failed to read section " << section_name;
-        return false;
-      }
-      content->append(data_or_err->begin(), data_or_err->end());
-      return true;
-    }
-  }
-  LOG(ERROR) << "can't find section " << section_name;
-  return false;
-}
-
 bool ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
                             std::string* content) {
   if (!IsValidElfPath(filename)) {
@@ -384,9 +429,9 @@
   }
   bool result = false;
   if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
-    result = ReadSectionFromELFFile(elf->getELFFile(), section_name, content);
+    result = ReadSectionFromELFFile(elf, section_name, content);
   } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
-    result = ReadSectionFromELFFile(elf->getELFFile(), section_name, content);
+    result = ReadSectionFromELFFile(elf, section_name, content);
   } else {
     LOG(ERROR) << "unknown elf format in file" << filename;
     return false;
diff --git a/simpleperf/read_elf_test.cpp b/simpleperf/read_elf_test.cpp
index 929540f..f2649e0 100644
--- a/simpleperf/read_elf_test.cpp
+++ b/simpleperf/read_elf_test.cpp
@@ -20,6 +20,7 @@
 
 #include <map>
 #include "get_test_data.h"
+#include "test_util.h"
 
 TEST(read_elf, GetBuildIdFromElfFile) {
   BuildId build_id;
@@ -38,16 +39,24 @@
   (*symbols)[symbol.name] = symbol;
 }
 
-void CheckElfFileSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
+static void CheckGlobalVariableSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
   auto pos = symbols.find("GlobalVar");
   ASSERT_NE(pos, symbols.end());
   ASSERT_FALSE(pos->second.is_func);
-  pos = symbols.find("GlobalFunc");
+}
+
+static void CheckFunctionSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
+  auto pos = symbols.find("GlobalFunc");
   ASSERT_NE(pos, symbols.end());
   ASSERT_TRUE(pos->second.is_func);
   ASSERT_TRUE(pos->second.is_in_text_section);
 }
 
+void CheckElfFileSymbols(const std::map<std::string, ElfFileSymbol>& symbols) {
+  CheckGlobalVariableSymbols(symbols);
+  CheckFunctionSymbols(symbols);
+}
+
 TEST(read_elf, parse_symbols_from_elf_file_with_correct_build_id) {
   std::map<std::string, ElfFileSymbol> symbols;
   ASSERT_TRUE(ParseSymbolsFromElfFile(GetTestData(ELF_FILE), elf_file_build_id,
@@ -77,6 +86,13 @@
   CheckElfFileSymbols(symbols);
 }
 
+TEST(read_elf, ParseSymbolFromMiniDebugInfoElfFile) {
+  std::map<std::string, ElfFileSymbol> symbols;
+  ASSERT_TRUE(ParseSymbolsFromElfFile(GetTestData(ELF_FILE_WITH_MINI_DEBUG_INFO), BuildId(),
+                                      std::bind(ParseSymbol, std::placeholders::_1, &symbols)));
+  CheckFunctionSymbols(symbols);
+}
+
 TEST(read_elf, arm_mapping_symbol) {
   ASSERT_TRUE(IsArmMappingSymbol("$a"));
   ASSERT_FALSE(IsArmMappingSymbol("$b"));
diff --git a/simpleperf/test_util.h b/simpleperf/test_util.h
index cfbe493..61a0ec7 100644
--- a/simpleperf/test_util.h
+++ b/simpleperf/test_util.h
@@ -15,7 +15,9 @@
  */
 
 #include <map>
+#include <memory>
 #include <string>
+#include <vector>
 
 #include "read_elf.h"
 #include "workload.h"
diff --git a/simpleperf/testdata/elf_with_mini_debug_info b/simpleperf/testdata/elf_with_mini_debug_info
new file mode 100644
index 0000000..b3aa967
--- /dev/null
+++ b/simpleperf/testdata/elf_with_mini_debug_info
Binary files differ
diff --git a/simpleperf/testdata/perf_with_mini_debug_info.data b/simpleperf/testdata/perf_with_mini_debug_info.data
new file mode 100644
index 0000000..0b02b3b
--- /dev/null
+++ b/simpleperf/testdata/perf_with_mini_debug_info.data
Binary files differ
diff --git a/simpleperf/utils.cpp b/simpleperf/utils.cpp
index 99e1e98..f2418a6 100644
--- a/simpleperf/utils.cpp
+++ b/simpleperf/utils.cpp
@@ -30,6 +30,10 @@
 #include <android-base/file.h>
 #include <android-base/logging.h>
 
+#include <7zCrc.h>
+#include <Xz.h>
+#include <XzCrc64.h>
+
 void OneTimeFreeAllocator::Clear() {
   for (auto& p : v_) {
     delete[] p;
@@ -180,3 +184,49 @@
   }
   return true;
 }
+
+static void* xz_alloc(void*, size_t size) {
+  return malloc(size);
+}
+
+static void xz_free(void*, void* address) {
+  free(address);
+}
+
+bool XzDecompress(const std::string& compressed_data, std::string* decompressed_data) {
+  ISzAlloc alloc;
+  CXzUnpacker state;
+  alloc.Alloc = xz_alloc;
+  alloc.Free = xz_free;
+  XzUnpacker_Construct(&state, &alloc);
+  CrcGenerateTable();
+  Crc64GenerateTable();
+  size_t src_offset = 0;
+  size_t dst_offset = 0;
+  std::string dst(compressed_data.size(), ' ');
+
+  ECoderStatus status = CODER_STATUS_NOT_FINISHED;
+  while (status == CODER_STATUS_NOT_FINISHED) {
+    dst.resize(dst.size() * 2);
+    size_t src_remaining = compressed_data.size() - src_offset;
+    size_t dst_remaining = dst.size() - dst_offset;
+    int res = XzUnpacker_Code(&state, reinterpret_cast<Byte*>(&dst[dst_offset]), &dst_remaining,
+                              reinterpret_cast<const Byte*>(&compressed_data[src_offset]),
+                              &src_remaining, CODER_FINISH_ANY, &status);
+    if (res != SZ_OK) {
+      LOG(ERROR) << "LZMA decompression failed with error " << res;
+      XzUnpacker_Free(&state);
+      return false;
+    }
+    src_offset += src_remaining;
+    dst_offset += dst_remaining;
+  }
+  XzUnpacker_Free(&state);
+  if (!XzUnpacker_IsStreamWasFinished(&state)) {
+    LOG(ERROR) << "LZMA decompresstion failed due to incomplete stream";
+    return false;
+  }
+  dst.resize(dst_offset);
+  *decompressed_data = std::move(dst);
+  return true;
+}
diff --git a/simpleperf/utils.h b/simpleperf/utils.h
index 1164b1e..c5c4366 100644
--- a/simpleperf/utils.h
+++ b/simpleperf/utils.h
@@ -119,4 +119,6 @@
 uint64_t GetFileSize(const std::string& filename);
 bool MkdirWithParents(const std::string& path);
 
+bool XzDecompress(const std::string& compressed_data, std::string* decompressed_data);
+
 #endif  // SIMPLE_PERF_UTILS_H_