Simpleperf: use ThreadTree when getting hit files in record command.

The new method is more accurate and has lower time complexity.

Bug: 22229391
Change-Id: I8b3016798b8a0e20335adeb7ec5dda0068044142
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp
index 01b0cea..8f73732 100644
--- a/simpleperf/cmd_dumprecord.cpp
+++ b/simpleperf/cmd_dumprecord.cpp
@@ -172,7 +172,7 @@
 }
 
 void DumpRecordCommand::DumpDataSection() {
-  std::vector<std::unique_ptr<const Record>> records = record_file_reader_->DataSection();
+  std::vector<std::unique_ptr<Record>> records = record_file_reader_->DataSection();
   for (auto& record : records) {
     record->Dump();
   }
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index dd77ec3..e4f1d41 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -34,6 +34,7 @@
 #include "read_elf.h"
 #include "record.h"
 #include "record_file.h"
+#include "thread_tree.h"
 #include "utils.h"
 #include "workload.h"
 
@@ -122,6 +123,7 @@
   bool DumpThreadCommAndMmaps(bool all_threads, const std::vector<pid_t>& selected_threads);
   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
   bool DumpBuildIdFeature();
+  bool GetHitFiles(std::set<std::string>* kernel_modules, std::set<std::string>* user_files);
 
   bool use_sample_freq_;    // Use sample_freq_ when true, otherwise using sample_period_.
   uint64_t sample_freq_;    // Sample 'sample_freq_' times per second.
@@ -538,15 +540,15 @@
 }
 
 bool RecordCommand::DumpBuildIdFeature() {
-  std::vector<std::string> hit_kernel_modules;
-  std::vector<std::string> hit_user_files;
-  if (!record_file_writer_->GetHitModules(&hit_kernel_modules, &hit_user_files)) {
+  std::set<std::string> kernel_modules;
+  std::set<std::string> user_files;
+  if (!GetHitFiles(&kernel_modules, &user_files)) {
     return false;
   }
   std::vector<BuildIdRecord> build_id_records;
   BuildId build_id;
   // Add build_ids for kernel/modules.
-  for (auto& filename : hit_kernel_modules) {
+  for (auto& filename : kernel_modules) {
     if (filename == DEFAULT_KERNEL_MMAP_NAME) {
       if (!GetKernelBuildId(&build_id)) {
         LOG(DEBUG) << "can't read build_id for kernel";
@@ -555,7 +557,8 @@
       build_id_records.push_back(
           CreateBuildIdRecord(true, UINT_MAX, build_id, DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID));
     } else {
-      std::string module_name = basename(&filename[0]);
+      std::string path = filename;
+      std::string module_name = basename(&path[0]);
       if (android::base::EndsWith(module_name, ".ko")) {
         module_name = module_name.substr(0, module_name.size() - 3);
       }
@@ -567,7 +570,7 @@
     }
   }
   // Add build_ids for user elf files.
-  for (auto& filename : hit_user_files) {
+  for (auto& filename : user_files) {
     if (filename == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
       continue;
     }
@@ -583,6 +586,30 @@
   return true;
 }
 
+bool RecordCommand::GetHitFiles(std::set<std::string>* kernel_modules,
+                                std::set<std::string>* user_files) {
+  std::vector<std::unique_ptr<Record>> records;
+  if (!record_file_writer_->ReadDataSection(&records)) {
+    return false;
+  }
+  ThreadTree thread_tree;
+  BuildThreadTree(records, &thread_tree);
+  for (auto& record : records) {
+    if (record->header.type == PERF_RECORD_SAMPLE) {
+      auto r = *static_cast<const SampleRecord*>(record.get());
+      bool in_kernel = ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL);
+      const ThreadEntry* thread = thread_tree.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+      const MapEntry* map = thread_tree.FindMap(thread, r.ip_data.ip, in_kernel);
+      if (in_kernel) {
+        kernel_modules->insert(map->dso->path);
+      } else {
+        user_files->insert(map->dso->path);
+      }
+    }
+  }
+  return true;
+}
+
 __attribute__((constructor)) static void RegisterRecordCommand() {
   RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
 }
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 80d149d..b9c3b9f 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -60,7 +60,7 @@
   ASSERT_TRUE(RecordCmd()->Run({"sleep", "1"}));
   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance("perf.data");
   ASSERT_TRUE(reader != nullptr);
-  std::vector<std::unique_ptr<const Record>> records = reader->DataSection();
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
   ASSERT_GT(records.size(), 0U);
   bool have_kernel_mmap = false;
   for (auto& record : records) {
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index ca9e5aa..c92aa65 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -448,38 +448,12 @@
 }
 
 void ReportCommand::ReadSampleTreeFromRecordFile() {
+  std::vector<std::unique_ptr<Record>> records = record_file_reader_->DataSection();
   thread_tree_.AddThread(0, 0, "swapper");
-
-  std::vector<std::unique_ptr<const Record>> records = record_file_reader_->DataSection();
+  BuildThreadTree(records, &thread_tree_);
   for (auto& record : records) {
-    if (record->header.type == PERF_RECORD_MMAP) {
-      const MmapRecord& r = *static_cast<const MmapRecord*>(record.get());
-      if ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL) {
-        thread_tree_.AddKernelMap(r.data.addr, r.data.len, r.data.pgoff, r.sample_id.time_data.time,
-                                  r.filename);
-      } else {
-        thread_tree_.AddThreadMap(r.data.pid, r.data.tid, r.data.addr, r.data.len, r.data.pgoff,
-                                  r.sample_id.time_data.time, r.filename);
-      }
-    } else if (record->header.type == PERF_RECORD_MMAP2) {
-      const Mmap2Record& r = *static_cast<const Mmap2Record*>(record.get());
-      if ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL) {
-        thread_tree_.AddKernelMap(r.data.addr, r.data.len, r.data.pgoff, r.sample_id.time_data.time,
-                                  r.filename);
-      } else {
-        std::string filename =
-            (r.filename == DEFAULT_EXECNAME_FOR_THREAD_MMAP) ? "[unknown]" : r.filename;
-        thread_tree_.AddThreadMap(r.data.pid, r.data.tid, r.data.addr, r.data.len, r.data.pgoff,
-                                  r.sample_id.time_data.time, filename);
-      }
-    } else if (record->header.type == PERF_RECORD_SAMPLE) {
+    if (record->header.type == PERF_RECORD_SAMPLE) {
       ProcessSampleRecord(*static_cast<const SampleRecord*>(record.get()));
-    } else if (record->header.type == PERF_RECORD_COMM) {
-      const CommRecord& r = *static_cast<const CommRecord*>(record.get());
-      thread_tree_.AddThread(r.data.pid, r.data.tid, r.comm);
-    } else if (record->header.type == PERF_RECORD_FORK) {
-      const ForkRecord& r = *static_cast<const ForkRecord*>(record.get());
-      thread_tree_.ForkThread(r.data.pid, r.data.tid, r.data.ppid, r.data.ptid);
     }
   }
 }
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index 8b7f09b..90e0977 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -17,6 +17,7 @@
 #include "record.h"
 
 #include <inttypes.h>
+#include <algorithm>
 #include <unordered_map>
 
 #include <base/logging.h>
@@ -435,26 +436,65 @@
   return buf;
 }
 
-std::unique_ptr<const Record> ReadRecordFromBuffer(const perf_event_attr& attr,
-                                                   const perf_event_header* pheader) {
+static std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr,
+                                                    const perf_event_header* pheader) {
   switch (pheader->type) {
     case PERF_RECORD_MMAP:
-      return std::unique_ptr<const Record>(new MmapRecord(attr, pheader));
+      return std::unique_ptr<Record>(new MmapRecord(attr, pheader));
     case PERF_RECORD_MMAP2:
-      return std::unique_ptr<const Record>(new Mmap2Record(attr, pheader));
+      return std::unique_ptr<Record>(new Mmap2Record(attr, pheader));
     case PERF_RECORD_COMM:
-      return std::unique_ptr<const Record>(new CommRecord(attr, pheader));
+      return std::unique_ptr<Record>(new CommRecord(attr, pheader));
     case PERF_RECORD_EXIT:
-      return std::unique_ptr<const Record>(new ExitRecord(attr, pheader));
+      return std::unique_ptr<Record>(new ExitRecord(attr, pheader));
     case PERF_RECORD_FORK:
-      return std::unique_ptr<const Record>(new ForkRecord(attr, pheader));
+      return std::unique_ptr<Record>(new ForkRecord(attr, pheader));
     case PERF_RECORD_SAMPLE:
-      return std::unique_ptr<const Record>(new SampleRecord(attr, pheader));
+      return std::unique_ptr<Record>(new SampleRecord(attr, pheader));
     default:
-      return std::unique_ptr<const Record>(new Record(pheader));
+      return std::unique_ptr<Record>(new Record(pheader));
   }
 }
 
+static bool IsRecordHappensBefore(const std::unique_ptr<Record>& r1,
+                                  const std::unique_ptr<Record>& r2) {
+  bool is_r1_sample = (r1->header.type == PERF_RECORD_SAMPLE);
+  bool is_r2_sample = (r2->header.type == PERF_RECORD_SAMPLE);
+  uint64_t time1 = (is_r1_sample ? static_cast<const SampleRecord*>(r1.get())->time_data.time
+                                 : r1->sample_id.time_data.time);
+  uint64_t time2 = (is_r2_sample ? static_cast<const SampleRecord*>(r2.get())->time_data.time
+                                 : r2->sample_id.time_data.time);
+  // The record with smaller time happens first.
+  if (time1 != time2) {
+    return time1 < time2;
+  }
+  // If happening at the same time, make non-sample records before sample records,
+  // because non-sample records may contain useful information to parse sample records.
+  if (is_r1_sample != is_r2_sample) {
+    return is_r1_sample ? false : true;
+  }
+  // Otherwise, don't care of the order.
+  return false;
+}
+
+std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(const perf_event_attr& attr,
+                                                           const char* buf, size_t buf_size) {
+  std::vector<std::unique_ptr<Record>> result;
+  const char* p = buf;
+  const char* end = buf + buf_size;
+  while (p < end) {
+    const perf_event_header* header = reinterpret_cast<const perf_event_header*>(p);
+    if (p + header->size <= end) {
+      result.push_back(ReadRecordFromBuffer(attr, header));
+    }
+    p += header->size;
+  }
+  if ((attr.sample_type & PERF_SAMPLE_TIME) && attr.sample_id_all) {
+    std::sort(result.begin(), result.end(), IsRecordHappensBefore);
+  }
+  return result;
+}
+
 MmapRecord CreateMmapRecord(const perf_event_attr& attr, bool in_kernel, uint32_t pid, uint32_t tid,
                             uint64_t addr, uint64_t len, uint64_t pgoff,
                             const std::string& filename) {
diff --git a/simpleperf/record.h b/simpleperf/record.h
index b982675..c63a38e 100644
--- a/simpleperf/record.h
+++ b/simpleperf/record.h
@@ -268,8 +268,8 @@
   void DumpData(size_t indent) const override;
 };
 
-std::unique_ptr<const Record> ReadRecordFromBuffer(const perf_event_attr& attr,
-                                                   const perf_event_header* pheader);
+std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(const perf_event_attr& attr,
+                                                           const char* buf, size_t buf_size);
 MmapRecord CreateMmapRecord(const perf_event_attr& attr, bool in_kernel, uint32_t pid, uint32_t tid,
                             uint64_t addr, uint64_t len, uint64_t pgoff,
                             const std::string& filename);
@@ -279,4 +279,5 @@
                             uint32_t ptid);
 BuildIdRecord CreateBuildIdRecord(bool in_kernel, pid_t pid, const BuildId& build_id,
                                   const std::string& filename);
+
 #endif  // SIMPLE_PERF_RECORD_H_
diff --git a/simpleperf/record_file.h b/simpleperf/record_file.h
index 07912fd..d94e083 100644
--- a/simpleperf/record_file.h
+++ b/simpleperf/record_file.h
@@ -46,10 +46,8 @@
     return WriteData(data.data(), data.size());
   }
 
-  // Use MmapRecords and SampleRecords in record file to conclude which modules/files were executing
-  // at sample times.
-  bool GetHitModules(std::vector<std::string>* hit_kernel_modules,
-                     std::vector<std::string>* hit_user_files);
+  // Read data section that has been written, for further processing.
+  bool ReadDataSection(std::vector<std::unique_ptr<Record>>* records);
 
   bool WriteFeatureHeader(size_t feature_count);
   bool WriteBuildIdFeature(const std::vector<BuildIdRecord>& build_id_records);
@@ -101,7 +99,7 @@
   const PerfFileFormat::FileHeader* FileHeader();
   std::vector<const PerfFileFormat::FileAttr*> AttrSection();
   std::vector<uint64_t> IdsForAttr(const PerfFileFormat::FileAttr* attr);
-  std::vector<std::unique_ptr<const Record>> DataSection();
+  std::vector<std::unique_ptr<Record>> DataSection();
   const std::map<int, PerfFileFormat::SectionDesc>& FeatureSectionDescriptors();
   const char* DataAtOffset(uint64_t offset) {
     return mmap_addr_ + offset;
diff --git a/simpleperf/record_file_reader.cpp b/simpleperf/record_file_reader.cpp
index 5336b80..8e61aa0 100644
--- a/simpleperf/record_file_reader.cpp
+++ b/simpleperf/record_file_reader.cpp
@@ -110,47 +110,12 @@
   return result;
 }
 
-static bool IsRecordHappensBefore(const std::unique_ptr<const Record>& r1,
-                                  const std::unique_ptr<const Record>& r2) {
-  bool is_r1_sample = (r1->header.type == PERF_RECORD_SAMPLE);
-  bool is_r2_sample = (r2->header.type == PERF_RECORD_SAMPLE);
-  uint64_t time1 = (is_r1_sample ? static_cast<const SampleRecord*>(r1.get())->time_data.time
-                                 : r1->sample_id.time_data.time);
-  uint64_t time2 = (is_r2_sample ? static_cast<const SampleRecord*>(r2.get())->time_data.time
-                                 : r2->sample_id.time_data.time);
-  // The record with smaller time happens first.
-  if (time1 != time2) {
-    return time1 < time2;
-  }
-  // If happening at the same time, make non-sample records before sample records,
-  // because non-sample records may contain useful information to parse sample records.
-  if (is_r1_sample != is_r2_sample) {
-    return is_r1_sample ? false : true;
-  }
-  // Otherwise, don't care of the order.
-  return false;
-}
-
-std::vector<std::unique_ptr<const Record>> RecordFileReader::DataSection() {
-  std::vector<std::unique_ptr<const Record>> result;
+std::vector<std::unique_ptr<Record>> RecordFileReader::DataSection() {
   const struct FileHeader* header = FileHeader();
   auto file_attrs = AttrSection();
   CHECK(file_attrs.size() > 0);
-  perf_event_attr attr = file_attrs[0]->attr;
-
-  const char* end = mmap_addr_ + header->data.offset + header->data.size;
-  const char* p = mmap_addr_ + header->data.offset;
-  while (p < end) {
-    const perf_event_header* header = reinterpret_cast<const perf_event_header*>(p);
-    if (p + header->size <= end) {
-      result.push_back(ReadRecordFromBuffer(attr, header));
-    }
-    p += header->size;
-  }
-  if ((attr.sample_type & PERF_SAMPLE_TIME) && attr.sample_id_all) {
-    std::sort(result.begin(), result.end(), IsRecordHappensBefore);
-  }
-  return result;
+  return ReadRecordsFromBuffer(file_attrs[0]->attr, mmap_addr_ + header->data.offset,
+                               header->data.size);
 }
 
 const std::map<int, SectionDesc>& RecordFileReader::FeatureSectionDescriptors() {
diff --git a/simpleperf/record_file_test.cpp b/simpleperf/record_file_test.cpp
index 35a66d6..3cefb83 100644
--- a/simpleperf/record_file_test.cpp
+++ b/simpleperf/record_file_test.cpp
@@ -58,6 +58,12 @@
       CreateMmapRecord(event_attr, true, 1, 1, 0x1000, 0x2000, 0x3000, "mmap_record_example");
   ASSERT_TRUE(writer->WriteData(mmap_record.BinaryFormat()));
 
+  // Check data section that has been written.
+  std::vector<std::unique_ptr<Record>> records;
+  ASSERT_TRUE(writer->ReadDataSection(&records));
+  ASSERT_EQ(1u, records.size());
+  CheckRecordEqual(mmap_record, *records[0]);
+
   // Write feature section.
   ASSERT_TRUE(writer->WriteFeatureHeader(1));
   char p[BuildId::Size()];
@@ -81,7 +87,7 @@
   ASSERT_EQ(1u, ids.size());
 
   // Read and check data section.
-  std::vector<std::unique_ptr<const Record>> records = reader->DataSection();
+  records = reader->DataSection();
   ASSERT_EQ(1u, records.size());
   CheckRecordEqual(mmap_record, *records[0]);
 
@@ -120,7 +126,7 @@
   // Read from a record file.
   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(filename);
   ASSERT_TRUE(reader != nullptr);
-  std::vector<std::unique_ptr<const Record>> records = reader->DataSection();
+  std::vector<std::unique_ptr<Record>> records = reader->DataSection();
   ASSERT_EQ(3u, records.size());
   CheckRecordEqual(r2, *records[0]);
   CheckRecordEqual(r1, *records[1]);
diff --git a/simpleperf/record_file_writer.cpp b/simpleperf/record_file_writer.cpp
index e2358d3..79bb7cf 100644
--- a/simpleperf/record_file_writer.cpp
+++ b/simpleperf/record_file_writer.cpp
@@ -133,64 +133,7 @@
   return true;
 }
 
-void RecordFileWriter::GetHitModulesInBuffer(const char* p, const char* end,
-                                             std::vector<std::string>* hit_kernel_modules,
-                                             std::vector<std::string>* hit_user_files) {
-  std::vector<std::unique_ptr<const Record>> kernel_mmaps;
-  std::vector<std::unique_ptr<const Record>> user_mmaps;
-  std::set<std::string> hit_kernel_set;
-  std::set<std::string> hit_user_set;
-
-  while (p < end) {
-    auto header = reinterpret_cast<const perf_event_header*>(p);
-    CHECK_LE(p + header->size, end);
-    p += header->size;
-    std::unique_ptr<const Record> record = ReadRecordFromBuffer(event_attr_, header);
-    CHECK(record != nullptr);
-    if (record->header.type == PERF_RECORD_MMAP) {
-      if (record->header.misc & PERF_RECORD_MISC_KERNEL) {
-        kernel_mmaps.push_back(std::move(record));
-      } else {
-        user_mmaps.push_back(std::move(record));
-      }
-    } else if (record->header.type == PERF_RECORD_SAMPLE) {
-      auto& r = *static_cast<const SampleRecord*>(record.get());
-      if (!(r.sample_type & PERF_SAMPLE_IP) || !(r.sample_type & PERF_SAMPLE_TID)) {
-        continue;
-      }
-      uint32_t pid = r.tid_data.pid;
-      uint64_t ip = r.ip_data.ip;
-      if (r.header.misc & PERF_RECORD_MISC_KERNEL) {
-        // Loop from back to front, because new MmapRecords are inserted at the end of the mmaps,
-        // and we want to match the newest one.
-        for (auto it = kernel_mmaps.rbegin(); it != kernel_mmaps.rend(); ++it) {
-          auto& m_record = *reinterpret_cast<const MmapRecord*>(it->get());
-          if (ip >= m_record.data.addr && ip < m_record.data.addr + m_record.data.len) {
-            hit_kernel_set.insert(m_record.filename);
-            break;
-          }
-        }
-      } else {
-        for (auto it = user_mmaps.rbegin(); it != user_mmaps.rend(); ++it) {
-          auto& m_record = *reinterpret_cast<const MmapRecord*>(it->get());
-          if (pid == m_record.data.pid && ip >= m_record.data.addr &&
-              ip < m_record.data.addr + m_record.data.len) {
-            hit_user_set.insert(m_record.filename);
-            break;
-          }
-        }
-      }
-    }
-  }
-  hit_kernel_modules->clear();
-  hit_kernel_modules->insert(hit_kernel_modules->begin(), hit_kernel_set.begin(),
-                             hit_kernel_set.end());
-  hit_user_files->clear();
-  hit_user_files->insert(hit_user_files->begin(), hit_user_set.begin(), hit_user_set.end());
-}
-
-bool RecordFileWriter::GetHitModules(std::vector<std::string>* hit_kernel_modules,
-                                     std::vector<std::string>* hit_user_files) {
+bool RecordFileWriter::ReadDataSection(std::vector<std::unique_ptr<Record>>* records) {
   if (fflush(record_fp_) != 0) {
     PLOG(ERROR) << "fflush() failed";
     return false;
@@ -205,14 +148,14 @@
     PLOG(ERROR) << "mmap() failed";
     return false;
   }
-  const char* data_section_p = reinterpret_cast<const char*>(mmap_addr) + data_section_offset_;
-  const char* data_section_end = data_section_p + data_section_size_;
-  GetHitModulesInBuffer(data_section_p, data_section_end, hit_kernel_modules, hit_user_files);
-
+  const char* data_section = reinterpret_cast<char*>(mmap_addr) + data_section_offset_;
+  std::vector<std::unique_ptr<Record>> result =
+      ReadRecordsFromBuffer(event_attr_, data_section, data_section_size_);
   if (munmap(mmap_addr, mmap_len) == -1) {
     PLOG(ERROR) << "munmap() failed";
     return false;
   }
+  *records = std::move(result);
   return true;
 }
 
diff --git a/simpleperf/record_test.cpp b/simpleperf/record_test.cpp
index 96262a8..27edc52 100644
--- a/simpleperf/record_test.cpp
+++ b/simpleperf/record_test.cpp
@@ -38,10 +38,10 @@
 template <class RecordType>
 void RecordTest::CheckRecordMatchBinary(const RecordType& record) {
   std::vector<char> binary = record.BinaryFormat();
-  std::unique_ptr<const Record> record_p =
-      ReadRecordFromBuffer(event_attr, reinterpret_cast<const perf_event_header*>(binary.data()));
-  ASSERT_TRUE(record_p != nullptr);
-  CheckRecordEqual(record, *record_p);
+  std::vector<std::unique_ptr<Record>> records =
+      ReadRecordsFromBuffer(event_attr, binary.data(), binary.size());
+  ASSERT_EQ(1u, records.size());
+  CheckRecordEqual(record, *records[0]);
 }
 
 TEST_F(RecordTest, MmapRecordMatchBinary) {
diff --git a/simpleperf/thread_tree.cpp b/simpleperf/thread_tree.cpp
index ef51b5c..a442686 100644
--- a/simpleperf/thread_tree.cpp
+++ b/simpleperf/thread_tree.cpp
@@ -18,6 +18,8 @@
 
 #include <base/logging.h>
 #include "environment.h"
+#include "perf_event.h"
+#include "record.h"
 
 bool MapComparator::operator()(const MapEntry* map1, const MapEntry* map2) const {
   if (map1->start_addr != map2->start_addr) {
@@ -175,3 +177,35 @@
   }
   return symbol;
 }
+
+void BuildThreadTree(const std::vector<std::unique_ptr<Record>>& records, ThreadTree* thread_tree) {
+  for (auto& record : records) {
+    if (record->header.type == PERF_RECORD_MMAP) {
+      const MmapRecord& r = *static_cast<const MmapRecord*>(record.get());
+      if ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL) {
+        thread_tree->AddKernelMap(r.data.addr, r.data.len, r.data.pgoff, r.sample_id.time_data.time,
+                                  r.filename);
+      } else {
+        thread_tree->AddThreadMap(r.data.pid, r.data.tid, r.data.addr, r.data.len, r.data.pgoff,
+                                  r.sample_id.time_data.time, r.filename);
+      }
+    } else if (record->header.type == PERF_RECORD_MMAP2) {
+      const Mmap2Record& r = *static_cast<const Mmap2Record*>(record.get());
+      if ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL) {
+        thread_tree->AddKernelMap(r.data.addr, r.data.len, r.data.pgoff, r.sample_id.time_data.time,
+                                  r.filename);
+      } else {
+        std::string filename =
+            (r.filename == DEFAULT_EXECNAME_FOR_THREAD_MMAP) ? "[unknown]" : r.filename;
+        thread_tree->AddThreadMap(r.data.pid, r.data.tid, r.data.addr, r.data.len, r.data.pgoff,
+                                  r.sample_id.time_data.time, filename);
+      }
+    } else if (record->header.type == PERF_RECORD_COMM) {
+      const CommRecord& r = *static_cast<const CommRecord*>(record.get());
+      thread_tree->AddThread(r.data.pid, r.data.tid, r.comm);
+    } else if (record->header.type == PERF_RECORD_FORK) {
+      const ForkRecord& r = *static_cast<const ForkRecord*>(record.get());
+      thread_tree->ForkThread(r.data.pid, r.data.tid, r.data.ppid, r.data.ptid);
+    }
+  }
+}
diff --git a/simpleperf/thread_tree.h b/simpleperf/thread_tree.h
index 8e96e36..9388c8d 100644
--- a/simpleperf/thread_tree.h
+++ b/simpleperf/thread_tree.h
@@ -89,4 +89,8 @@
   SymbolEntry unknown_symbol_;
 };
 
+struct Record;
+
+void BuildThreadTree(const std::vector<std::unique_ptr<Record>>& records, ThreadTree* thread_tree);
+
 #endif  // SIMPLE_PERF_THREAD_TREE_H_