Merge "simpleperf: add --exclude-perf option in record cmd."
diff --git a/simpleperf/RecordReadThread.cpp b/simpleperf/RecordReadThread.cpp
index b89066b..dfcc1a9 100644
--- a/simpleperf/RecordReadThread.cpp
+++ b/simpleperf/RecordReadThread.cpp
@@ -110,8 +110,12 @@
     : sample_type_(attr.sample_type),
       sample_regs_count_(__builtin_popcountll(attr.sample_regs_user)) {
   size_t pos = sizeof(perf_event_header);
-  uint64_t mask = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+  uint64_t mask = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
   pos += __builtin_popcountll(sample_type_ & mask) * sizeof(uint64_t);
+  if (sample_type_ & PERF_SAMPLE_TID) {
+    pid_pos_in_sample_records_ = pos;
+    pos += sizeof(uint64_t);
+  }
   if (sample_type_ & PERF_SAMPLE_TIME) {
     time_pos_in_sample_records_ = pos;
     pos += sizeof(uint64_t);
@@ -207,7 +211,8 @@
 
 RecordReadThread::RecordReadThread(size_t record_buffer_size, const perf_event_attr& attr,
                                    size_t min_mmap_pages, size_t max_mmap_pages,
-                                   size_t aux_buffer_size, bool allow_cutting_samples)
+                                   size_t aux_buffer_size, bool allow_cutting_samples,
+                                   bool exclude_perf)
     : record_buffer_(record_buffer_size),
       record_parser_(attr),
       attr_(attr),
@@ -222,6 +227,9 @@
   if (!allow_cutting_samples) {
     record_buffer_low_level_ = record_buffer_critical_level_;
   }
+  if (exclude_perf) {
+    exclude_pid_ = getpid();
+  }
 }
 
 RecordReadThread::~RecordReadThread() {
@@ -488,6 +496,13 @@
 
 void RecordReadThread::PushRecordToRecordBuffer(KernelRecordReader* kernel_record_reader) {
   const perf_event_header& header = kernel_record_reader->RecordHeader();
+  if (header.type == PERF_RECORD_SAMPLE && exclude_pid_ != -1) {
+    uint32_t pid;
+    kernel_record_reader->ReadRecord(record_parser_.GetPidPosInSampleRecord(), sizeof(pid), &pid);
+    if (pid == exclude_pid_) {
+      return;
+    }
+  }
   if (header.type == PERF_RECORD_SAMPLE && stack_size_in_sample_record_ > 1024) {
     size_t free_size = record_buffer_.GetFreeSize();
     if (free_size < record_buffer_critical_level_) {
diff --git a/simpleperf/RecordReadThread.h b/simpleperf/RecordReadThread.h
index 4e93f97..e991fff 100644
--- a/simpleperf/RecordReadThread.h
+++ b/simpleperf/RecordReadThread.h
@@ -70,6 +70,8 @@
  public:
   RecordParser(const perf_event_attr& attr);
 
+  // Return pos of the pid field in the sample record. If not available, return 0.
+  size_t GetPidPosInSampleRecord() const { return pid_pos_in_sample_records_; }
   // Return pos of the time field in the record. If not available, return 0.
   size_t GetTimePos(const perf_event_header& header) const;
   // Return pos of the user stack size field in the sample record. If not available, return 0.
@@ -78,6 +80,7 @@
  private:
   uint64_t sample_type_;
   uint64_t sample_regs_count_;
+  size_t pid_pos_in_sample_records_ = 0;
   size_t time_pos_in_sample_records_ = 0;
   size_t time_rpos_in_non_sample_records_ = 0;
   size_t callchain_pos_in_sample_records_ = 0;
@@ -125,7 +128,7 @@
  public:
   RecordReadThread(size_t record_buffer_size, const perf_event_attr& attr, size_t min_mmap_pages,
                    size_t max_mmap_pages, size_t aux_buffer_size,
-                   bool allow_cutting_samples = true);
+                   bool allow_cutting_samples = true, bool exclude_perf = false);
   ~RecordReadThread();
   void SetBufferLevels(size_t record_buffer_low_level, size_t record_buffer_critical_level) {
     record_buffer_low_level_ = record_buffer_low_level;
@@ -203,6 +206,7 @@
 
   std::unique_ptr<std::thread> read_thread_;
   std::vector<KernelRecordReader> kernel_record_readers_;
+  pid_t exclude_pid_ = -1;
 
   RecordStat stat_;
 };
diff --git a/simpleperf/RecordReadThread_test.cpp b/simpleperf/RecordReadThread_test.cpp
index eae5834..8d7f6ac 100644
--- a/simpleperf/RecordReadThread_test.cpp
+++ b/simpleperf/RecordReadThread_test.cpp
@@ -22,6 +22,7 @@
 #include "event_type.h"
 #include "get_test_data.h"
 #include "record.h"
+#include "record_equal_test.h"
 #include "record_file.h"
 
 using ::testing::_;
@@ -91,11 +92,18 @@
       read_record_fn(pos, sizeof(time), &time);
       ASSERT_EQ(record->Timestamp(), time);
       if (record->type() == PERF_RECORD_SAMPLE) {
+        auto sr = static_cast<SampleRecord*>(record.get());
         pos = parser.GetStackSizePos(read_record_fn);
         ASSERT_NE(0u, pos);
         uint64_t stack_size;
         read_record_fn(pos, sizeof(stack_size), &stack_size);
-        ASSERT_EQ(static_cast<SampleRecord*>(record.get())->stack_user_data.size, stack_size);
+        ASSERT_EQ(sr->stack_user_data.size, stack_size);
+
+        // Test pid pos in sample records.
+        pos = parser.GetPidPosInSampleRecord();
+        uint32_t pid;
+        read_record_fn(pos, sizeof(pid), &pid);
+        ASSERT_EQ(sr->tid_data.pid, pid);
       }
     }
   };
@@ -395,6 +403,45 @@
   ASSERT_EQ(thread.GetStat().cut_stack_samples, 0u);
 }
 
+TEST_F(RecordReadThreadTest, exclude_perf) {
+  perf_event_attr attr = CreateFakeEventAttr();
+  attr.sample_type |= PERF_SAMPLE_STACK_USER;
+  size_t stack_size = 1024;
+  attr.sample_stack_user = stack_size;
+  records_.emplace_back(new SampleRecord(attr, 0, 1, getpid(), 3, 4, 5, 6, {},
+                                         std::vector<char>(stack_size), stack_size));
+  records_.emplace_back(new SampleRecord(attr, 0, 1, getpid() + 1, 3, 4, 5, 6, {},
+                                         std::vector<char>(stack_size), stack_size));
+
+  auto read_records = [&](RecordReadThread& thread, std::vector<std::unique_ptr<Record>>& records) {
+    records.clear();
+    std::vector<EventFd*> event_fds = CreateFakeEventFds(attr, 1);
+    ASSERT_TRUE(thread.AddEventFds(event_fds));
+    ASSERT_TRUE(thread.SyncKernelBuffer());
+    ASSERT_TRUE(thread.RemoveEventFds(event_fds));
+    while (auto r = thread.GetRecord()) {
+      records.emplace_back(std::move(r));
+    }
+  };
+
+  // By default, no samples are excluded.
+  RecordReadThread thread(128 * 1024, attr, 1, 1, 0);
+  IOEventLoop loop;
+  ASSERT_TRUE(thread.RegisterDataCallback(loop, []() { return true; }));
+  std::vector<std::unique_ptr<Record>> received_records;
+  read_records(thread, received_records);
+  ASSERT_EQ(received_records.size(), 2);
+  CheckRecordEqual(*received_records[0], *records_[0]);
+  CheckRecordEqual(*received_records[1], *records_[1]);
+
+  // With exclude_perf, the first sample is excluded.
+  RecordReadThread thread2(128 * 1024, attr, 1, 1, 0, true, true);
+  ASSERT_TRUE(thread2.RegisterDataCallback(loop, []() { return true; }));
+  read_records(thread2, received_records);
+  ASSERT_EQ(received_records.size(), 1);
+  CheckRecordEqual(*received_records[0], *records_[1]);
+}
+
 struct FakeAuxData {
   std::vector<char> buf1;
   std::vector<char> buf2;
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index a9e6869..e9d8317 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -118,7 +118,8 @@
 "       can be used to change target of sampling information.\n"
 "       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
 "Select monitored threads:\n"
-"-a     System-wide collection.\n"
+"-a     System-wide collection. Use with --exclude-perf to exclude samples for\n"
+"       simpleperf process.\n"
 #if defined(__ANDROID__)
 "--app package_name    Profile the process of an Android application.\n"
 "                      On non-rooted devices, the app must be debuggable,\n"
@@ -127,6 +128,7 @@
 "-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
 "                       with -a.\n"
 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
+"--exclude-perf   Exclude samples for simpleperf process.\n"
 "\n"
 "Select monitored event types:\n"
 "-e event1[:modifier1],event2[:modifier2],...\n"
@@ -367,6 +369,7 @@
   EventAttrWithId dumping_attr_id_;
   // In system wide recording, record if we have dumped map info for a process.
   std::unordered_set<pid_t> dumped_processes_;
+  bool exclude_perf_ = false;
 };
 
 bool RecordCommand::Run(const std::vector<std::string>& args) {
@@ -486,7 +489,7 @@
                                                       : kRecordBufferSize;
   if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
                                            aux_buffer_size_, record_buffer_size,
-                                           allow_cutting_samples_)) {
+                                           allow_cutting_samples_, exclude_perf_)) {
     return false;
   }
   auto callback =
@@ -820,6 +823,8 @@
           wait_setting_speed_event_groups_.push_back(group_id);
         }
       }
+    } else if (args[i] == "--exclude-perf") {
+      exclude_perf_ = true;
     } else if (args[i] == "--exit-with-parent") {
       prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
     } else if (args[i] == "-g") {
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 207ec64..f4ed1d8 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -1019,3 +1019,23 @@
   }
   TEST_IN_ROOT(ASSERT_TRUE(RunRecordCmd({"-e", event_string})));
 }
+
+TEST(record_cmd, exclude_perf_option) {
+  ASSERT_TRUE(RunRecordCmd({"--exclude-perf"}));
+  if (IsRoot()) {
+    TemporaryFile tmpfile;
+    ASSERT_TRUE(RecordCmd()->Run(
+        {"-a", "--exclude-perf", "--duration", "1", "-e", GetDefaultEvent(), "-o", tmpfile.path}));
+    std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path);
+    ASSERT_TRUE(reader);
+    pid_t perf_pid = getpid();
+    ASSERT_TRUE(reader->ReadDataSection([&](std::unique_ptr<Record> r) {
+      if (r->type() == PERF_RECORD_SAMPLE) {
+        if (static_cast<SampleRecord*>(r.get())->tid_data.pid == perf_pid) {
+          return false;
+        }
+      }
+      return true;
+    }));
+  }
+}
\ No newline at end of file
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index d26127f..6583bd2 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -630,10 +630,11 @@
 
 bool EventSelectionSet::MmapEventFiles(size_t min_mmap_pages, size_t max_mmap_pages,
                                        size_t aux_buffer_size, size_t record_buffer_size,
-                                       bool allow_cutting_samples) {
+                                       bool allow_cutting_samples, bool exclude_perf) {
   record_read_thread_.reset(
       new simpleperf::RecordReadThread(record_buffer_size, groups_[0][0].event_attr, min_mmap_pages,
-                                       max_mmap_pages, aux_buffer_size, allow_cutting_samples));
+                                       max_mmap_pages, aux_buffer_size, allow_cutting_samples,
+                                       exclude_perf));
   return true;
 }
 
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index 3afc76d..2af1471 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -143,7 +143,7 @@
   bool OpenEventFiles(const std::vector<int>& cpus);
   bool ReadCounters(std::vector<CountersInfo>* counters);
   bool MmapEventFiles(size_t min_mmap_pages, size_t max_mmap_pages, size_t aux_buffer_size,
-                      size_t record_buffer_size, bool allow_cutting_samples);
+                      size_t record_buffer_size, bool allow_cutting_samples, bool exclude_perf);
   bool PrepareToReadMmapEventData(const std::function<bool(Record*)>& callback);
   bool SyncKernelBuffer();
   bool FinishReadMmapEventData();