Simpleperf: improve symbol parsing.

Support c++ symbol demangling, and add option to disable it.
Add option to set symfs dir, so it can report on host.
Add label symbols, add add symbol length fixing for it.
Add two hacks about arm elf symbol parsing.

Bug: 19483574

Change-Id: I300d6c007c8634db382d0a50627b9cde1502df89
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index d6aca57..f33ed49 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -133,9 +133,11 @@
       : Command("report", "report sampling information in perf.data",
                 "Usage: simpleperf report [options]\n"
                 "    -i <file>     specify path of record file, default is perf.data\n"
+                "    --no-demangle        Don't demangle symbol names.\n"
                 "    --sort key1,key2,... Select the keys to sort and print the report.\n"
                 "                         Possible keys include pid, comm, dso, symbol.\n"
-                "                         Default keys are \"comm,pid,dso\"\n"),
+                "                         Default keys are \"comm,pid,dso\"\n"
+                "    --symfs <dir>  Look for files with symbols relative to this directory.\n"),
         record_filename_("perf.data") {
   }
 
@@ -191,6 +193,8 @@
         return false;
       }
       record_filename_ = args[i];
+    } else if (args[i] == "--no-demangle") {
+      DsoFactory::SetDemangle(false);
     } else if (args[i] == "--sort") {
       if (!NextArgumentOrError(args, &i)) {
         return false;
@@ -205,6 +209,13 @@
           return false;
         }
       }
+    } else if (args[i] == "--symfs") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      if (!DsoFactory::SetSymFsDir(args[i])) {
+        return false;
+      }
     } else {
       ReportUnknownOption(args, i);
       return false;
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
index e8cd27c..7d4671c 100644
--- a/simpleperf/dso.cpp
+++ b/simpleperf/dso.cpp
@@ -16,8 +16,11 @@
 
 #include "dso.h"
 
+#include <stdlib.h>
+#include <base/logging.h>
 #include "environment.h"
 #include "read_elf.h"
+#include "utils.h"
 
 bool SymbolComparator::operator()(const std::unique_ptr<SymbolEntry>& symbol1,
                                   const std::unique_ptr<SymbolEntry>& symbol2) {
@@ -39,6 +42,30 @@
   return nullptr;
 }
 
+bool DsoFactory::demangle = true;
+
+void DsoFactory::SetDemangle(bool demangle) {
+  DsoFactory::demangle = demangle;
+}
+
+std::string DsoFactory::symfs_dir;
+
+bool DsoFactory::SetSymFsDir(const std::string& symfs_dir) {
+  std::string dirname = symfs_dir;
+  if (!dirname.empty() && dirname.back() != '/') {
+    dirname.push_back('/');
+  }
+  std::vector<std::string> files;
+  std::vector<std::string> subdirs;
+  GetEntriesInDir(symfs_dir, &files, &subdirs);
+  if (files.empty() && subdirs.empty()) {
+    LOG(ERROR) << "Invalid symfs_dir '" << symfs_dir << "'";
+    return false;
+  }
+  DsoFactory::symfs_dir = dirname;
+  return true;
+}
+
 static bool IsKernelFunctionSymbol(const KernelSymbol& symbol) {
   return (symbol.type == 'T' || symbol.type == 't' || symbol.type == 'W' || symbol.type == 'w');
 }
@@ -53,23 +80,26 @@
   return false;
 }
 
+static void FixupSymbolLength(DsoEntry* dso) {
+  SymbolEntry* prev_symbol = nullptr;
+  for (auto& symbol : dso->symbols) {
+    if (prev_symbol != nullptr && prev_symbol->len == 0) {
+      prev_symbol->len = symbol->addr - prev_symbol->addr;
+    }
+    prev_symbol = symbol.get();
+  }
+  if (prev_symbol != nullptr && prev_symbol->len == 0) {
+    prev_symbol->len = ULLONG_MAX - prev_symbol->addr;
+  }
+}
+
 std::unique_ptr<DsoEntry> DsoFactory::LoadKernel() {
   std::unique_ptr<DsoEntry> dso(new DsoEntry);
   dso->path = "[kernel.kallsyms]";
 
   ProcessKernelSymbols("/proc/kallsyms",
                        std::bind(&KernelSymbolCallback, std::placeholders::_1, dso.get()));
-  // Fix symbol.len.
-  auto prev_it = dso->symbols.end();
-  for (auto it = dso->symbols.begin(); it != dso->symbols.end(); ++it) {
-    if (prev_it != dso->symbols.end()) {
-      (*prev_it)->len = (*it)->addr - (*prev_it)->addr;
-    }
-    prev_it = it;
-  }
-  if (prev_it != dso->symbols.end()) {
-    (*prev_it)->len = ULLONG_MAX - (*prev_it)->addr;
-  }
+  FixupSymbolLength(dso.get());
   return dso;
 }
 
@@ -91,19 +121,37 @@
 std::unique_ptr<DsoEntry> DsoFactory::LoadKernelModule(const std::string& dso_path) {
   std::unique_ptr<DsoEntry> dso(new DsoEntry);
   dso->path = dso_path;
-  ParseSymbolsFromElfFile(dso_path, std::bind(ParseSymbolCallback, std::placeholders::_1, dso.get(),
-                                              SymbolFilterForKernelModule));
+  ParseSymbolsFromElfFile(symfs_dir + dso_path, std::bind(ParseSymbolCallback, std::placeholders::_1,
+                                                          dso.get(), SymbolFilterForKernelModule));
+  FixupSymbolLength(dso.get());
   return dso;
 }
 
 static bool SymbolFilterForDso(const ElfFileSymbol& elf_symbol) {
-  return elf_symbol.is_func;
+  return elf_symbol.is_func || (elf_symbol.is_label && elf_symbol.is_in_text_section);
+}
+
+extern "C" char* __cxa_demangle(const char* mangled_name, char* buf, size_t* n, int* status);
+
+static void DemangleInPlace(std::string* name) {
+  int status;
+  char* demangled_name = __cxa_demangle(name->c_str(), nullptr, nullptr, &status);
+  if (status == 0) {
+    *name = demangled_name;
+    free(demangled_name);
+  }
 }
 
 std::unique_ptr<DsoEntry> DsoFactory::LoadDso(const std::string& dso_path) {
   std::unique_ptr<DsoEntry> dso(new DsoEntry);
   dso->path = dso_path;
-  ParseSymbolsFromElfFile(dso_path, std::bind(ParseSymbolCallback, std::placeholders::_1, dso.get(),
-                                              SymbolFilterForDso));
+  ParseSymbolsFromElfFile(symfs_dir + dso_path, std::bind(ParseSymbolCallback, std::placeholders::_1,
+                                                          dso.get(), SymbolFilterForDso));
+  if (demangle) {
+    for (auto& symbol : dso->symbols) {
+      DemangleInPlace(&symbol->name);
+    }
+  }
+  FixupSymbolLength(dso.get());
   return dso;
 }
diff --git a/simpleperf/dso.h b/simpleperf/dso.h
index f596cd5..2d79c92 100644
--- a/simpleperf/dso.h
+++ b/simpleperf/dso.h
@@ -41,9 +41,15 @@
 
 class DsoFactory {
  public:
+  static void SetDemangle(bool demangle);
+  static bool SetSymFsDir(const std::string& symfs_dir);
   static std::unique_ptr<DsoEntry> LoadKernel();
   static std::unique_ptr<DsoEntry> LoadKernelModule(const std::string& dso_path);
   static std::unique_ptr<DsoEntry> LoadDso(const std::string& dso_path);
+
+ private:
+  static bool demangle;
+  static std::string symfs_dir;
 };
 
 #endif  // SIMPLE_PERF_DSO_H_
diff --git a/simpleperf/read_elf.cpp b/simpleperf/read_elf.cpp
index 6585252..a56a4b2 100644
--- a/simpleperf/read_elf.cpp
+++ b/simpleperf/read_elf.cpp
@@ -117,6 +117,7 @@
 template <class ELFT>
 bool ParseSymbolsFromELFFile(const llvm::object::ELFFile<ELFT>* elf,
                              std::function<void(const ElfFileSymbol&)> callback) {
+  bool is_arm = (elf->getHeader()->e_machine == EM_ARM);
   auto begin = elf->begin_symbols();
   auto end = elf->end_symbols();
   if (begin == end) {
@@ -137,13 +138,6 @@
     if (section_name.getError() || section_name.get().empty()) {
       continue;
     }
-
-    symbol.start_in_file = elf_symbol.st_value - shdr->sh_addr + shdr->sh_offset;
-    symbol.len = elf_symbol.st_size;
-    int type = elf_symbol.getType();
-    if (type & STT_FUNC) {
-      symbol.is_func = true;
-    }
     if (section_name.get() == ".text") {
       symbol.is_in_text_section = true;
     }
@@ -156,6 +150,26 @@
     if (symbol.name.empty()) {
       continue;
     }
+
+    symbol.start_in_file = elf_symbol.st_value - shdr->sh_addr + shdr->sh_offset;
+    if ((symbol.start_in_file & 1) != 0 && is_arm) {
+      // Arm sets bit 0 to mark it as thumb code, remove the flag.
+      symbol.start_in_file &= ~1;
+    }
+    symbol.len = elf_symbol.st_size;
+    int type = elf_symbol.getType();
+    if (type == STT_FUNC) {
+      symbol.is_func = true;
+    } else if (type == STT_NOTYPE) {
+      if (symbol.is_in_text_section) {
+        symbol.is_label = true;
+        // Arm has meaningless labels like $t, $d, $x.
+        if (is_arm && symbol.name.size() == 2 && symbol.name[0] == '$') {
+          symbol.is_label = false;
+        }
+      }
+    }
+
     callback(symbol);
   }
   return true;
diff --git a/simpleperf/read_elf.h b/simpleperf/read_elf.h
index ee90c8a..e21add4 100644
--- a/simpleperf/read_elf.h
+++ b/simpleperf/read_elf.h
@@ -28,6 +28,7 @@
   uint64_t start_in_file;
   uint64_t len;
   bool is_func;
+  bool is_label;
   bool is_in_text_section;
   std::string name;
 };