simpleperf: fix build id check of files in symfs.

In dso.cpp, build_id_map_ should use path_ instead of GetAccessiblePath() as the key.
However, patch https://android-review.googlesource.com/#/c/175654/ wrongly used
GetAccessiblePath() as the key in build_id_map_. This patch fixes the error and add
corresponding test.

Check if file in symfs exists before using it as debug file path.
If the build id of debug file path doesn't match the one in build_id_map_, output
warning to user.

Bug: 28911532
Test: run simpleperf_unit_test.
Change-Id: I21bca508359a492245db4cba5d287005363cd465
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index 14f4463..48fe0df 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -317,6 +317,26 @@
   ASSERT_NE(content.find("VaddrInFile"), std::string::npos);
 }
 
+TEST_F(ReportCommandTest, check_build_id) {
+  Report(PERF_DATA_FOR_BUILD_ID_CHECK,
+         {"--symfs", GetTestData(CORRECT_SYMFS_FOR_BUILD_ID_CHECK)});
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("main"), std::string::npos);
+  ASSERT_EXIT(
+      {
+        Report(PERF_DATA_FOR_BUILD_ID_CHECK,
+               {"--symfs", GetTestData(WRONG_SYMFS_FOR_BUILD_ID_CHECK)});
+        if (!success) {
+          exit(1);
+        }
+        if (content.find("main") != std::string::npos) {
+          exit(2);
+        }
+        exit(0);
+      },
+      testing::ExitedWithCode(0), "build id.*mismatch");
+}
+
 #if defined(__linux__)
 
 static std::unique_ptr<Command> RecordCmd() {
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
index 45ae124..c10779a 100644
--- a/simpleperf/dso.cpp
+++ b/simpleperf/dso.cpp
@@ -120,8 +120,8 @@
   build_id_map_ = std::move(map);
 }
 
-BuildId Dso::GetExpectedBuildId(const std::string& filename) {
-  auto it = build_id_map_.find(filename);
+BuildId Dso::GetExpectedBuildId() {
+  auto it = build_id_map_.find(path_);
   if (it != build_id_map_.end()) {
     return it->second;
   }
@@ -138,9 +138,22 @@
     : type_(type),
       id_(id),
       path_(path),
+      debug_file_path_(path),
       min_vaddr_(std::numeric_limits<uint64_t>::max()),
       is_loaded_(false),
       has_dumped_(false) {
+  // Check if file matching path_ exists in symfs directory before using it as
+  // debug_file_path_.
+  if (!symfs_dir_.empty()) {
+    std::string path_in_symfs = symfs_dir_ + path_;
+    std::tuple<bool, std::string, std::string> tuple =
+        SplitUrlInApk(path_in_symfs);
+    std::string file_path =
+        std::get<0>(tuple) ? std::get<1>(tuple) : path_in_symfs;
+    if (IsRegularFile(file_path)) {
+      debug_file_path_ = path_in_symfs;
+    }
+  }
   dso_count_++;
 }
 
@@ -156,8 +169,6 @@
   }
 }
 
-std::string Dso::GetAccessiblePath() const { return symfs_dir_ + path_; }
-
 const Symbol* Dso::FindSymbol(uint64_t vaddr_in_dso) {
   if (!is_loaded_) {
     is_loaded_ = true;
@@ -188,10 +199,10 @@
   if (min_vaddr_ == std::numeric_limits<uint64_t>::max()) {
     min_vaddr_ = 0;
     if (type_ == DSO_ELF_FILE) {
-      BuildId build_id = GetExpectedBuildId(GetAccessiblePath());
+      BuildId build_id = GetExpectedBuildId();
 
       uint64_t addr;
-      if (ReadMinExecutableVirtualAddressFromElfFile(GetAccessiblePath(),
+      if (ReadMinExecutableVirtualAddressFromElfFile(GetDebugFilePath(),
                                                      build_id, &addr)) {
         min_vaddr_ = addr;
       }
@@ -229,14 +240,14 @@
           symbol.type == 'w');
 }
 
-bool Dso::KernelSymbolCallback(const KernelSymbol& kernel_symbol, Dso* dso) {
+static bool KernelSymbolCallback(const KernelSymbol& kernel_symbol, Dso* dso) {
   if (IsKernelFunctionSymbol(kernel_symbol)) {
     dso->InsertSymbol(Symbol(kernel_symbol.name, kernel_symbol.addr, 0));
   }
   return false;
 }
 
-void Dso::VmlinuxSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso) {
+static void VmlinuxSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso) {
   if (elf_symbol.is_func) {
     dso->InsertSymbol(
         Symbol(elf_symbol.name, elf_symbol.vaddr, elf_symbol.len));
@@ -244,7 +255,7 @@
 }
 
 bool Dso::LoadKernel() {
-  BuildId build_id = GetExpectedBuildId(DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID);
+  BuildId build_id = GetExpectedBuildId();
   if (!vmlinux_.empty()) {
     ParseSymbolsFromElfFile(
         vmlinux_, build_id,
@@ -271,9 +282,10 @@
       BuildId real_build_id;
       GetKernelBuildId(&real_build_id);
       bool match = (build_id == real_build_id);
-      LOG(DEBUG) << "check kernel build id (" << (match ? "match" : "mismatch")
-                 << "): expected " << build_id.ToString() << ", real "
-                 << real_build_id.ToString();
+      LOG(WARNING) << "check kernel build id ("
+                   << (match ? "match" : "mismatch") << "): expected "
+                   << build_id.ToString() << ", real "
+                   << real_build_id.ToString();
       if (!match) {
         return false;
       }
@@ -303,8 +315,8 @@
   return true;
 }
 
-void Dso::ElfFileSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso,
-                                bool (*filter)(const ElfFileSymbol&)) {
+static void ElfFileSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso,
+                                  bool (*filter)(const ElfFileSymbol&)) {
   if (filter(elf_symbol)) {
     dso->InsertSymbol(
         Symbol(elf_symbol.name, elf_symbol.vaddr, elf_symbol.len));
@@ -317,7 +329,7 @@
 }
 
 bool Dso::LoadKernelModule() {
-  BuildId build_id = GetExpectedBuildId(path_);
+  BuildId build_id = GetExpectedBuildId();
   ParseSymbolsFromElfFile(
       symfs_dir_ + path_, build_id,
       std::bind(ElfFileSymbolCallback, std::placeholders::_1, this,
@@ -332,7 +344,7 @@
 
 bool Dso::LoadElfFile() {
   bool loaded = false;
-  BuildId build_id = GetExpectedBuildId(GetAccessiblePath());
+  BuildId build_id = GetExpectedBuildId();
 
   if (symfs_dir_.empty()) {
     // Linux host can store debug shared libraries in /usr/lib/debug.
@@ -343,7 +355,7 @@
   }
   if (!loaded) {
     loaded = ParseSymbolsFromElfFile(
-        GetAccessiblePath(), build_id,
+        GetDebugFilePath(), build_id,
         std::bind(ElfFileSymbolCallback, std::placeholders::_1, this,
                   SymbolFilterForDso));
   }
@@ -351,8 +363,8 @@
 }
 
 bool Dso::LoadEmbeddedElfFile() {
-  std::string path = GetAccessiblePath();
-  BuildId build_id = GetExpectedBuildId(path);
+  std::string path = GetDebugFilePath();
+  BuildId build_id = GetExpectedBuildId();
   auto tuple = SplitUrlInApk(path);
   CHECK(std::get<0>(tuple));
   return ParseSymbolsFromApkFile(
diff --git a/simpleperf/dso.h b/simpleperf/dso.h
index e823aca..5f3915e 100644
--- a/simpleperf/dso.h
+++ b/simpleperf/dso.h
@@ -84,15 +84,13 @@
 
   // Return the path recorded in perf.data.
   const std::string& Path() const { return path_; }
+  // Return the path containing symbol table and debug information.
+  const std::string& GetDebugFilePath() const { return debug_file_path_; }
 
   bool HasDumped() const { return has_dumped_; }
 
   void SetDumped() { has_dumped_ = true; }
 
-  // Return the accessible path. It may be the same as Path(), or
-  // return the path with prefix set by SetSymFsDir().
-  std::string GetAccessiblePath() const;
-
   // Return the minimum virtual address in program header.
   uint64_t MinVirtualAddress();
 
@@ -100,12 +98,6 @@
   void InsertSymbol(const Symbol& symbol);
 
  private:
-  static BuildId GetExpectedBuildId(const std::string& filename);
-  static bool KernelSymbolCallback(const KernelSymbol& kernel_symbol, Dso* dso);
-  static void VmlinuxSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso);
-  static void ElfFileSymbolCallback(const ElfFileSymbol& elf_symbol, Dso* dso,
-                                    bool (*filter)(const ElfFileSymbol&));
-
   static bool demangle_;
   static std::string symfs_dir_;
   static std::string vmlinux_;
@@ -120,10 +112,15 @@
   bool LoadElfFile();
   bool LoadEmbeddedElfFile();
   void FixupSymbolLength();
+  BuildId GetExpectedBuildId();
 
   const DsoType type_;
   const uint64_t id_;
+  // path of the shared library used by the profiled program
   const std::string path_;
+  // path of the shared library having symbol table and debug information
+  // It is the same as path_, or has the same build id as path_.
+  std::string debug_file_path_;
   uint64_t min_vaddr_;
   std::set<Symbol, SymbolComparator> symbols_;
   bool is_loaded_;
diff --git a/simpleperf/dwarf_unwind.cpp b/simpleperf/dwarf_unwind.cpp
index ff3c1fe..040ff52 100644
--- a/simpleperf/dwarf_unwind.cpp
+++ b/simpleperf/dwarf_unwind.cpp
@@ -117,7 +117,7 @@
     bt_map.start = map->start_addr;
     bt_map.end = map->start_addr + map->len;
     bt_map.offset = map->pgoff;
-    bt_map.name = map->dso->GetAccessiblePath();
+    bt_map.name = map->dso->GetDebugFilePath();
   }
   std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(thread.pid, bt_maps));
 
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
index 36b6ed1..1040448 100644
--- a/simpleperf/get_test_data.h
+++ b/simpleperf/get_test_data.h
@@ -77,4 +77,11 @@
 // perf_kmem_slab_callgraph.data is generated by `simpleperf kmem record --slab --call-graph fp -f 100 sleep 0.0001`.
 static const std::string PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD = "perf_with_kmem_slab_callgraph.data";
 
+
+// perf_for_build_id_check.data is generated by recording a process running
+// testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check.
+static const std::string PERF_DATA_FOR_BUILD_ID_CHECK = "perf_for_build_id_check.data";
+static const std::string CORRECT_SYMFS_FOR_BUILD_ID_CHECK = "data/correct_symfs_for_build_id_check";
+static const std::string WRONG_SYMFS_FOR_BUILD_ID_CHECK = "data/wrong_symfs_for_build_id_check";
+
 #endif  // SIMPLE_PERF_GET_TEST_DATA_H_
diff --git a/simpleperf/read_elf.cpp b/simpleperf/read_elf.cpp
index 51b85b6..751c627 100644
--- a/simpleperf/read_elf.cpp
+++ b/simpleperf/read_elf.cpp
@@ -372,11 +372,13 @@
     return false;
   }
   if (expected_build_id != real_build_id) {
-    LOG(DEBUG) << "build id for " << debug_filename << " mismatch: "
+    LOG(WARNING) << "build id for " << debug_filename << " mismatch: "
                << "expected " << expected_build_id.ToString()
                << ", real " << real_build_id.ToString();
     return false;
   }
+  LOG(VERBOSE) << "build id for " << debug_filename << " match: "
+               << "expected " << expected_build_id.ToString();
   return true;
 }
 
diff --git a/simpleperf/testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check b/simpleperf/testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check
new file mode 100755
index 0000000..5c1a9dd
--- /dev/null
+++ b/simpleperf/testdata/data/correct_symfs_for_build_id_check/elf_for_build_id_check
Binary files differ
diff --git a/simpleperf/testdata/data/wrong_symfs_for_build_id_check/elf_for_build_id_check b/simpleperf/testdata/data/wrong_symfs_for_build_id_check/elf_for_build_id_check
new file mode 100755
index 0000000..0489a22
--- /dev/null
+++ b/simpleperf/testdata/data/wrong_symfs_for_build_id_check/elf_for_build_id_check
Binary files differ
diff --git a/simpleperf/testdata/perf_for_build_id_check.data b/simpleperf/testdata/perf_for_build_id_check.data
new file mode 100644
index 0000000..1012d4b
--- /dev/null
+++ b/simpleperf/testdata/perf_for_build_id_check.data
Binary files differ