Simpleperf: check build id in report command.

1. refactor BuildId type.
2. check build id before parsing symbols in report command.

Bug: 22179177

Change-Id: Iefc797a88d4a168e109db786105120c8d6914369
diff --git a/simpleperf/build_id.h b/simpleperf/build_id.h
index 5a4b12c..8bc0798 100644
--- a/simpleperf/build_id.h
+++ b/simpleperf/build_id.h
@@ -17,10 +17,44 @@
 #ifndef SIMPLE_PERF_BUILD_ID_H_
 #define SIMPLE_PERF_BUILD_ID_H_
 
-#include <array>
+#include <string.h>
+#include <algorithm>
+#include <base/stringprintf.h>
 
-static constexpr int BUILD_ID_SIZE = 20;
+constexpr size_t BUILD_ID_SIZE = 20;
 
-typedef std::array<unsigned char, BUILD_ID_SIZE> BuildId;
+class BuildId {
+ public:
+  static size_t Size() {
+    return BUILD_ID_SIZE;
+  }
+
+  BuildId() {
+    memset(data_, '\0', BUILD_ID_SIZE);
+  }
+
+  BuildId(const void* data, size_t len = BUILD_ID_SIZE) : BuildId() {
+    memcpy(data_, data, std::min(len, BUILD_ID_SIZE));
+  }
+
+  const unsigned char* Data() const {
+    return data_;
+  }
+
+  std::string ToString() const {
+    std::string s = "0x";
+    for (size_t i = 0; i < BUILD_ID_SIZE; ++i) {
+      s += android::base::StringPrintf("0x%02x", data_[i]);
+    }
+    return s;
+  }
+
+  bool operator==(const BuildId& build_id) const {
+    return memcmp(data_, build_id.data_, BUILD_ID_SIZE) == 0;
+  }
+
+ private:
+  unsigned char data_[BUILD_ID_SIZE];
+};
 
 #endif  // SIMPLE_PERF_BUILD_ID_H_
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp
index 28175d9..ac8d6b7 100644
--- a/simpleperf/cmd_dumprecord.cpp
+++ b/simpleperf/cmd_dumprecord.cpp
@@ -179,15 +179,9 @@
     printf("feature section for %s: offset %" PRId64 ", size %" PRId64 "\n",
            GetFeatureName(feature).c_str(), section.offset, section.size);
     if (feature == FEAT_BUILD_ID) {
-      const char* p = record_file_reader_->DataAtOffset(section.offset);
-      const char* end = p + section.size;
-      while (p < end) {
-        const perf_event_header* header = reinterpret_cast<const perf_event_header*>(p);
-        CHECK_LE(p + header->size, end);
-        BuildIdRecord record(header);
-        record.header.type = PERF_RECORD_BUILD_ID;  // Set type explicitly as perf doesn't set it.
-        record.Dump(1);
-        p += header->size;
+      std::vector<BuildIdRecord> records = record_file_reader_->ReadBuildIdFeature();
+      for (auto& r : records) {
+        r.Dump(1);
       }
     } else if (feature == FEAT_CMDLINE) {
       std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index 87479bd..5d17715 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -296,8 +296,9 @@
   if (!ReadEventAttrFromRecordFile()) {
     return false;
   }
-  ReadSampleTreeFromRecordFile();
+  // Read features first to prepare build ids used when building SampleTree.
   ReadFeaturesFromRecordFile();
+  ReadSampleTreeFromRecordFile();
 
   // 3. Show collected information.
   PrintReport();
@@ -306,6 +307,8 @@
 }
 
 bool ReportCommand::ParseOptions(const std::vector<std::string>& args) {
+  bool demangle = true;
+  std::string symfs_dir;
   bool print_sample_count = false;
   std::vector<std::string> sort_keys = {"comm", "pid", "tid", "dso", "symbol"};
   for (size_t i = 0; i < args.size(); ++i) {
@@ -323,7 +326,7 @@
       print_sample_count = true;
 
     } else if (args[i] == "--no-demangle") {
-      DsoFactory::SetDemangle(false);
+      demangle = false;
 
     } else if (args[i] == "--sort") {
       if (!NextArgumentOrError(args, &i)) {
@@ -334,15 +337,18 @@
       if (!NextArgumentOrError(args, &i)) {
         return false;
       }
-      if (!DsoFactory::SetSymFsDir(args[i])) {
-        return false;
-      }
+      symfs_dir = args[i];
     } else {
       ReportUnknownOption(args, i);
       return false;
     }
   }
 
+  DsoFactory::SetDemangle(demangle);
+  if (!DsoFactory::SetSymFsDir(symfs_dir)) {
+    return false;
+  }
+
   if (!accumulate_callchain_) {
     displayable_items_.push_back(
         std::unique_ptr<Displayable>(new SelfOverheadItem(*sample_tree_, "Overhead")));
@@ -495,6 +501,12 @@
 }
 
 void ReportCommand::ReadFeaturesFromRecordFile() {
+  std::vector<BuildIdRecord> records = record_file_reader_->ReadBuildIdFeature();
+  std::vector<std::pair<std::string, BuildId>> build_ids;
+  for (auto& r : records) {
+    build_ids.push_back(std::make_pair(r.filename, r.build_id));
+  }
+  DsoFactory::SetBuildIds(build_ids);
   std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
   if (!cmdline.empty()) {
     record_cmdline_ = android::base::Join(cmdline, ' ');
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
index 562727b..c75537c 100644
--- a/simpleperf/dso.cpp
+++ b/simpleperf/dso.cpp
@@ -54,20 +54,33 @@
 
 bool DsoFactory::SetSymFsDir(const std::string& symfs_dir) {
   std::string dirname = symfs_dir;
-  if (!dirname.empty() && dirname.back() != '/') {
-    dirname.push_back('/');
-  }
-  std::vector<std::string> files;
-  std::vector<std::string> subdirs;
-  GetEntriesInDir(symfs_dir, &files, &subdirs);
-  if (files.empty() && subdirs.empty()) {
-    LOG(ERROR) << "Invalid symfs_dir '" << symfs_dir << "'";
-    return false;
+  if (!dirname.empty()) {
+    if (dirname.back() != '/') {
+      dirname.push_back('/');
+    }
+    std::vector<std::string> files;
+    std::vector<std::string> subdirs;
+    GetEntriesInDir(symfs_dir, &files, &subdirs);
+    if (files.empty() && subdirs.empty()) {
+      LOG(ERROR) << "Invalid symfs_dir '" << symfs_dir << "'";
+      return false;
+    }
   }
   DsoFactory::symfs_dir = dirname;
   return true;
 }
 
+std::unordered_map<std::string, BuildId> DsoFactory::build_id_map;
+
+void DsoFactory::SetBuildIds(const std::vector<std::pair<std::string, BuildId>>& build_ids) {
+  std::unordered_map<std::string, BuildId> map;
+  for (auto& pair : build_ids) {
+    LOG(DEBUG) << "build_id_map: " << pair.first << ", " << pair.second.ToString();
+    map.insert(pair);
+  }
+  build_id_map = std::move(map);
+}
+
 static bool IsKernelFunctionSymbol(const KernelSymbol& symbol) {
   return (symbol.type == 'T' || symbol.type == 't' || symbol.type == 'W' || symbol.type == 'w');
 }
@@ -101,9 +114,16 @@
 std::unique_ptr<DsoEntry> DsoFactory::LoadKernel() {
   std::unique_ptr<DsoEntry> dso(new DsoEntry);
   dso->path = "[kernel.kallsyms]";
-
-  ProcessKernelSymbols("/proc/kallsyms",
-                       std::bind(&KernelSymbolCallback, std::placeholders::_1, dso.get()));
+  BuildId build_id = GetExpectedBuildId(DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID);
+  BuildId real_build_id;
+  GetKernelBuildId(&real_build_id);
+  bool match = (build_id == real_build_id);
+  LOG(DEBUG) << "check kernel build id (" << (match ? "match" : "mismatch") << "): expected "
+             << build_id.ToString() << ", real " << real_build_id.ToString();
+  if (match) {
+    ProcessKernelSymbols("/proc/kallsyms",
+                         std::bind(&KernelSymbolCallback, std::placeholders::_1, dso.get()));
+  }
   FixupSymbolLength(dso.get());
   return dso;
 }
@@ -128,8 +148,10 @@
 std::unique_ptr<DsoEntry> DsoFactory::LoadKernelModule(const std::string& dso_path) {
   std::unique_ptr<DsoEntry> dso(new DsoEntry);
   dso->path = dso_path;
-  ParseSymbolsFromElfFile(symfs_dir + dso_path, std::bind(ParseSymbolCallback, std::placeholders::_1,
-                                                          dso.get(), SymbolFilterForKernelModule));
+  BuildId build_id = GetExpectedBuildId(dso_path);
+  ParseSymbolsFromElfFile(symfs_dir + dso_path, build_id,
+                          std::bind(ParseSymbolCallback, std::placeholders::_1, dso.get(),
+                                    SymbolFilterForKernelModule));
   FixupSymbolLength(dso.get());
   return dso;
 }
@@ -164,8 +186,10 @@
 std::unique_ptr<DsoEntry> DsoFactory::LoadDso(const std::string& dso_path) {
   std::unique_ptr<DsoEntry> dso(new DsoEntry);
   dso->path = dso_path;
-  ParseSymbolsFromElfFile(symfs_dir + dso_path, std::bind(ParseSymbolCallback, std::placeholders::_1,
-                                                          dso.get(), SymbolFilterForDso));
+  BuildId build_id = GetExpectedBuildId(dso_path);
+  ParseSymbolsFromElfFile(
+      symfs_dir + dso_path, build_id,
+      std::bind(ParseSymbolCallback, std::placeholders::_1, dso.get(), SymbolFilterForDso));
   if (demangle) {
     for (auto& symbol : dso->symbols) {
       DemangleInPlace(&symbol->name);
@@ -174,3 +198,11 @@
   FixupSymbolLength(dso.get());
   return dso;
 }
+
+BuildId DsoFactory::GetExpectedBuildId(const std::string& filename) {
+  auto it = build_id_map.find(filename);
+  if (it != build_id_map.end()) {
+    return it->second;
+  }
+  return BuildId();
+}
diff --git a/simpleperf/dso.h b/simpleperf/dso.h
index 2d79c92..ea50a6d 100644
--- a/simpleperf/dso.h
+++ b/simpleperf/dso.h
@@ -20,6 +20,10 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "build_id.h"
 
 struct SymbolEntry {
   std::string name;
@@ -43,13 +47,17 @@
  public:
   static void SetDemangle(bool demangle);
   static bool SetSymFsDir(const std::string& symfs_dir);
+  static void SetBuildIds(const std::vector<std::pair<std::string, BuildId>>& build_ids);
   static std::unique_ptr<DsoEntry> LoadKernel();
   static std::unique_ptr<DsoEntry> LoadKernelModule(const std::string& dso_path);
   static std::unique_ptr<DsoEntry> LoadDso(const std::string& dso_path);
 
  private:
+  static BuildId GetExpectedBuildId(const std::string& filename);
+
   static bool demangle;
   static std::string symfs_dir;
+  static std::unordered_map<std::string, BuildId> build_id_map;
 };
 
 #endif  // SIMPLE_PERF_DSO_H_
diff --git a/simpleperf/environment_fake.cpp b/simpleperf/environment_fake.cpp
index e8c9dd8..015c72a 100644
--- a/simpleperf/environment_fake.cpp
+++ b/simpleperf/environment_fake.cpp
@@ -20,3 +20,7 @@
 bool ProcessKernelSymbols(const std::string&, std::function<bool(const KernelSymbol&)>) {
   return false;
 }
+
+bool GetKernelBuildId(BuildId* build_id) {
+  return false;
+}
diff --git a/simpleperf/read_elf.cpp b/simpleperf/read_elf.cpp
index 4d41165..f703795 100644
--- a/simpleperf/read_elf.cpp
+++ b/simpleperf/read_elf.cpp
@@ -52,8 +52,7 @@
     descsz = ALIGN(descsz, 4);
     CHECK_LE(p + namesz + descsz, end);
     if ((type == NT_GNU_BUILD_ID) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
-      std::fill(build_id->begin(), build_id->end(), 0);
-      memcpy(build_id->data(), p + namesz, std::min(build_id->size(), descsz));
+      *build_id = BuildId(p + namesz, descsz);
       return true;
     }
     p += namesz + descsz;
@@ -93,27 +92,35 @@
   return false;
 }
 
+static bool GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
+  bool result = false;
+  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
+    result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
+  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
+    result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
+  } else {
+    LOG(ERROR) << "unknown elf format in file " << obj->getFileName().data();
+    return false;
+  }
+  if (!result) {
+    LOG(DEBUG) << "no build id present in file " << obj->getFileName().data();
+  }
+  return result;
+}
+
 bool GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
   auto owning_binary = llvm::object::createBinary(llvm::StringRef(filename));
   if (owning_binary.getError()) {
     PLOG(DEBUG) << "can't open file " << filename;
     return false;
   }
-  bool result = false;
   llvm::object::Binary* binary = owning_binary.get().getBinary();
-  if (auto obj = llvm::dyn_cast<llvm::object::ObjectFile>(binary)) {
-    if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
-      result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
-    } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
-      result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
-    } else {
-      PLOG(DEBUG) << "unknown elf format in file " << filename;
-    }
+  auto obj = llvm::dyn_cast<llvm::object::ObjectFile>(binary);
+  if (obj == nullptr) {
+    LOG(DEBUG) << filename << " is not an object file";
+    return false;
   }
-  if (!result) {
-    PLOG(DEBUG) << "can't read build_id from file " << filename;
-  }
-  return result;
+  return GetBuildIdFromObjectFile(obj, build_id);
 }
 
 bool IsArmMappingSymbol(const char* name) {
@@ -124,7 +131,7 @@
 }
 
 template <class ELFT>
-bool ParseSymbolsFromELFFile(const llvm::object::ELFFile<ELFT>* elf,
+void ParseSymbolsFromELFFile(const llvm::object::ELFFile<ELFT>* elf,
                              std::function<void(const ElfFileSymbol&)> callback) {
   bool is_arm = (elf->getHeader()->e_machine == llvm::ELF::EM_ARM ||
                  elf->getHeader()->e_machine == llvm::ELF::EM_AARCH64);
@@ -187,29 +194,38 @@
 
     callback(symbol);
   }
-  return true;
 }
 
-bool ParseSymbolsFromElfFile(const std::string& filename,
+bool ParseSymbolsFromElfFile(const std::string& filename, const BuildId& expected_build_id,
                              std::function<void(const ElfFileSymbol&)> callback) {
   auto owning_binary = llvm::object::createBinary(llvm::StringRef(filename));
   if (owning_binary.getError()) {
     PLOG(DEBUG) << "can't open file '" << filename << "'";
     return false;
   }
-  bool result = false;
   llvm::object::Binary* binary = owning_binary.get().getBinary();
-  if (auto obj = llvm::dyn_cast<llvm::object::ObjectFile>(binary)) {
-    if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
-      result = ParseSymbolsFromELFFile(elf->getELFFile(), callback);
-    } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
-      result = ParseSymbolsFromELFFile(elf->getELFFile(), callback);
-    } else {
-      PLOG(DEBUG) << "unknown elf format in file" << filename;
-    }
+  auto obj = llvm::dyn_cast<llvm::object::ObjectFile>(binary);
+  if (obj == nullptr) {
+    LOG(DEBUG) << filename << " is not an object file";
+    return false;
   }
+  BuildId real_build_id;
+  GetBuildIdFromObjectFile(obj, &real_build_id);
+  bool result = (expected_build_id == real_build_id);
+  LOG(DEBUG) << "check build id for \"" << filename << "\" (" << (result ? "match" : "mismatch")
+             << "): expected " << expected_build_id.ToString() << ", real "
+             << real_build_id.ToString();
   if (!result) {
-    PLOG(DEBUG) << "can't parse symbols from file " << filename;
+    return result;
   }
-  return result;
+
+  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
+    ParseSymbolsFromELFFile(elf->getELFFile(), callback);
+  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
+    ParseSymbolsFromELFFile(elf->getELFFile(), callback);
+  } else {
+    LOG(ERROR) << "unknown elf format in file" << filename;
+    return false;
+  }
+  return true;
 }
diff --git a/simpleperf/read_elf.h b/simpleperf/read_elf.h
index 96eb2f3..c01bda7 100644
--- a/simpleperf/read_elf.h
+++ b/simpleperf/read_elf.h
@@ -36,7 +36,7 @@
   std::string name;
 };
 
-bool ParseSymbolsFromElfFile(const std::string& filename,
+bool ParseSymbolsFromElfFile(const std::string& filename, const BuildId& expected_build_id,
                              std::function<void(const ElfFileSymbol&)> callback);
 
 // Expose the following functions for unit tests.
diff --git a/simpleperf/read_elf_test.cpp b/simpleperf/read_elf_test.cpp
index c0ff660..924af97 100644
--- a/simpleperf/read_elf_test.cpp
+++ b/simpleperf/read_elf_test.cpp
@@ -31,9 +31,11 @@
   ASSERT_LT(static_cast<size_t>(elf_file_len), sizeof(elf_file));
   elf_file[elf_file_len] = '\0';
 
+  BuildId build_id;
+  GetBuildIdFromElfFile(elf_file, &build_id);
   bool result = false;
-  ASSERT_TRUE(
-      ParseSymbolsFromElfFile(elf_file, std::bind(ParseSymbol, std::placeholders::_1, &result)));
+  ASSERT_TRUE(ParseSymbolsFromElfFile(elf_file, build_id,
+                                      std::bind(ParseSymbol, std::placeholders::_1, &result)));
   ASSERT_TRUE(result);
 }
 
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index f6b2560..578fc13 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -359,8 +359,8 @@
   const char* p = reinterpret_cast<const char*>(pheader + 1);
   const char* end = reinterpret_cast<const char*>(pheader) + pheader->size;
   MoveFromBinaryFormat(pid, p);
-  std::copy_n(p, build_id.size(), build_id.begin());
-  p += ALIGN(build_id.size(), 8);
+  build_id = BuildId(p);
+  p += ALIGN(build_id.Size(), 8);
   filename = p;
   p += ALIGN(filename.size() + 1, 64);
   CHECK_EQ(p, end);
@@ -368,11 +368,7 @@
 
 void BuildIdRecord::DumpData(size_t indent) const {
   PrintIndented(indent, "pid %u\n", pid);
-  PrintIndented(indent, "build_id 0x");
-  for (auto& c : build_id) {
-    printf("%02x", c);
-  }
-  printf("\n");
+  PrintIndented(indent, "build_id %s\n", build_id.ToString().c_str());
   PrintIndented(indent, "filename %s\n", filename.c_str());
 }
 
@@ -381,8 +377,8 @@
   char* p = buf.data();
   MoveToBinaryFormat(header, p);
   MoveToBinaryFormat(pid, p);
-  memcpy(p, build_id.data(), build_id.size());
-  p += ALIGN(build_id.size(), 8);
+  memcpy(p, build_id.Data(), build_id.Size());
+  p += ALIGN(build_id.Size(), 8);
   strcpy(p, filename.c_str());
   p += ALIGN(filename.size() + 1, 64);
   return buf;
@@ -464,6 +460,6 @@
   record.build_id = build_id;
   record.filename = filename;
   record.header.size = sizeof(record.header) + sizeof(record.pid) +
-                       ALIGN(record.build_id.size(), 8) + ALIGN(filename.size() + 1, 64);
+                       ALIGN(record.build_id.Size(), 8) + ALIGN(filename.size() + 1, 64);
   return record;
 }
diff --git a/simpleperf/record_file.h b/simpleperf/record_file.h
index d8b4413..347f017 100644
--- a/simpleperf/record_file.h
+++ b/simpleperf/record_file.h
@@ -106,6 +106,7 @@
     return mmap_addr_ + offset;
   }
   std::vector<std::string> ReadCmdlineFeature();
+  std::vector<BuildIdRecord> ReadBuildIdFeature();
   bool Close();
 
  private:
diff --git a/simpleperf/record_file_reader.cpp b/simpleperf/record_file_reader.cpp
index 8407d32..b78af51 100644
--- a/simpleperf/record_file_reader.cpp
+++ b/simpleperf/record_file_reader.cpp
@@ -196,3 +196,25 @@
   }
   return cmdline;
 }
+
+std::vector<BuildIdRecord> RecordFileReader::ReadBuildIdFeature() {
+  const std::map<int, SectionDesc>& section_map = FeatureSectionDescriptors();
+  auto it = section_map.find(FEAT_BUILD_ID);
+  if (it == section_map.end()) {
+    return std::vector<BuildIdRecord>();
+  }
+  SectionDesc section = it->second;
+  const char* p = DataAtOffset(section.offset);
+  const char* end = DataAtOffset(section.offset + section.size);
+  std::vector<BuildIdRecord> result;
+  while (p < end) {
+    const perf_event_header* header = reinterpret_cast<const perf_event_header*>(p);
+    CHECK_LE(p + header->size, end);
+    BuildIdRecord record(header);
+    // Set type explicitly as the perf.data produced by perf doesn't set it.
+    record.header.type = PERF_RECORD_BUILD_ID;
+    result.push_back(record);
+    p += header->size;
+  }
+  return result;
+}
diff --git a/simpleperf/record_file_test.cpp b/simpleperf/record_file_test.cpp
index 6e6bc13..35a66d6 100644
--- a/simpleperf/record_file_test.cpp
+++ b/simpleperf/record_file_test.cpp
@@ -60,10 +60,11 @@
 
   // Write feature section.
   ASSERT_TRUE(writer->WriteFeatureHeader(1));
-  BuildId build_id;
-  for (size_t i = 0; i < build_id.size(); ++i) {
-    build_id[i] = i;
+  char p[BuildId::Size()];
+  for (size_t i = 0; i < BuildId::Size(); ++i) {
+    p[i] = i;
   }
+  BuildId build_id(p);
   BuildIdRecord build_id_record = CreateBuildIdRecord(false, getpid(), build_id, "init");
   ASSERT_TRUE(writer->WriteBuildIdFeature({build_id_record}));
   ASSERT_TRUE(writer->Close());