simpleperf: record cmd: Add --delay option

When the --delay option is used, the record command waits for
the specified period of time before recording samples.

Also, make IOEventLoop use a precise timer by default. This way,
we don't need to set it in places that use IOEventLoop.

Bug: 296492988
Test: run simpleperf_unit_test
Change-Id: I2c66aa886aa99e671eafb41b28fc2c5fd700edf7
diff --git a/simpleperf/IOEventLoop.cpp b/simpleperf/IOEventLoop.cpp
index 239fff9..d961332 100644
--- a/simpleperf/IOEventLoop.cpp
+++ b/simpleperf/IOEventLoop.cpp
@@ -40,8 +40,7 @@
   }
 };
 
-IOEventLoop::IOEventLoop()
-    : ebase_(nullptr), has_error_(false), use_precise_timer_(false), in_loop_(false) {}
+IOEventLoop::IOEventLoop() : ebase_(nullptr), has_error_(false), in_loop_(false) {}
 
 IOEventLoop::~IOEventLoop() {
   events_.clear();
@@ -50,21 +49,11 @@
   }
 }
 
-bool IOEventLoop::UsePreciseTimer() {
-  if (ebase_ != nullptr) {
-    return false;  // Too late to set the flag.
-  }
-  use_precise_timer_ = true;
-  return true;
-}
-
 bool IOEventLoop::EnsureInit() {
   if (ebase_ == nullptr) {
     event_config* cfg = event_config_new();
     if (cfg != nullptr) {
-      if (use_precise_timer_) {
-        event_config_set_flag(cfg, EVENT_BASE_FLAG_PRECISE_TIMER);
-      }
+      event_config_set_flag(cfg, EVENT_BASE_FLAG_PRECISE_TIMER);
       if (event_config_avoid_method(cfg, "epoll") != 0) {
         LOG(ERROR) << "event_config_avoid_method";
         return false;
@@ -150,6 +139,11 @@
   return AddEvent(-1, EV_PERSIST, &duration, callback, priority);
 }
 
+IOEventRef IOEventLoop::AddOneTimeEvent(timeval duration, const std::function<bool()>& callback,
+                                        IOEventPriority priority) {
+  return AddEvent(-1, 0, &duration, callback, priority);
+}
+
 IOEventRef IOEventLoop::AddEvent(int fd_or_sig, int16_t events, timeval* timeout,
                                  const std::function<bool()>& callback, IOEventPriority priority) {
   if (!EnsureInit()) {
diff --git a/simpleperf/IOEventLoop.h b/simpleperf/IOEventLoop.h
index 1578a4d..f47fa3d 100644
--- a/simpleperf/IOEventLoop.h
+++ b/simpleperf/IOEventLoop.h
@@ -46,9 +46,6 @@
   IOEventLoop();
   ~IOEventLoop();
 
-  // Use precise timer for periodic events which want precision in ms.
-  bool UsePreciseTimer();
-
   // Register a read Event, so [callback] is called when [fd] can be read
   // without blocking. If registered successfully, return the reference
   // to control the Event, otherwise return nullptr.
@@ -74,6 +71,10 @@
   IOEventRef AddPeriodicEvent(timeval duration, const std::function<bool()>& callback,
                               IOEventPriority priority = IOEventLowPriority);
 
+  // Register a one time Event, so [callback] is called once after [duration].
+  IOEventRef AddOneTimeEvent(timeval duration, const std::function<bool()>& callback,
+                             IOEventPriority priority = IOEventLowPriority);
+
   // Run a loop polling for Events. It only exits when ExitLoop() is called
   // in a callback function of registered Events.
   bool RunLoop();
diff --git a/simpleperf/IOEventLoop_test.cpp b/simpleperf/IOEventLoop_test.cpp
index 658fe82..74a4528 100644
--- a/simpleperf/IOEventLoop_test.cpp
+++ b/simpleperf/IOEventLoop_test.cpp
@@ -123,15 +123,12 @@
   ASSERT_EQ(100, count);
 }
 
-void TestPeriodicEvents(int period_in_us, int iterations, bool precise) {
+void TestPeriodicEvents(int period_in_us, int iterations) {
   timeval tv;
   tv.tv_sec = period_in_us / 1000000;
   tv.tv_usec = period_in_us % 1000000;
   int count = 0;
   IOEventLoop loop;
-  if (precise) {
-    ASSERT_TRUE(loop.UsePreciseTimer());
-  }
   ASSERT_TRUE(loop.AddPeriodicEvent(tv, [&]() {
     if (++count == iterations) {
       loop.ExitLoop();
@@ -145,17 +142,41 @@
   double time_used =
       std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time).count();
   double min_time_in_sec = period_in_us / 1e6 * iterations;
-  double max_time_in_sec = min_time_in_sec + (precise ? 0.3 : 1);
+  double max_time_in_sec = min_time_in_sec + 0.3;
   ASSERT_GE(time_used, min_time_in_sec);
   ASSERT_LT(time_used, max_time_in_sec);
 }
 
 TEST(IOEventLoop, periodic) {
-  TestPeriodicEvents(1000000, 1, false);
+  TestPeriodicEvents(1000, 100);
 }
 
-TEST(IOEventLoop, periodic_precise) {
-  TestPeriodicEvents(1000, 100, true);
+TEST(IOEventLoop, one_time_event) {
+  int duration_in_us = 1000;
+  timeval tv = {};
+  tv.tv_usec = duration_in_us;
+  int count = 0;
+  auto callback_time = std::chrono::steady_clock::now();
+  IOEventLoop loop;
+  // Add a one time event to test callback count and time.
+  ASSERT_TRUE(loop.AddOneTimeEvent(tv, [&]() {
+    ++count;
+    callback_time = std::chrono::steady_clock::now();
+    return true;
+  }));
+  // Add another one time event to exit loop.
+  tv.tv_usec = duration_in_us * 3;
+  ASSERT_TRUE(loop.AddOneTimeEvent(tv, [&]() { return loop.ExitLoop(); }));
+
+  auto start_time = std::chrono::steady_clock::now();
+  ASSERT_TRUE(loop.RunLoop());
+  ASSERT_EQ(1, count);
+  double time_used =
+      std::chrono::duration_cast<std::chrono::duration<double>>(callback_time - start_time).count();
+  double min_time_in_sec = duration_in_us / 1e6;
+  double max_time_in_sec = min_time_in_sec + 0.3;
+  ASSERT_GE(time_used, min_time_in_sec);
+  ASSERT_LT(time_used, max_time_in_sec);
 }
 
 TEST(IOEventLoop, read_and_del_event) {
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 97f96ac..ad1133c 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -218,6 +218,7 @@
 "--cpu cpu_item1,cpu_item2,...  Monitor events on selected cpus. cpu_item can be a number like\n"
 "                               1, or a range like 0-3. A --cpu option affects all event types\n"
 "                               following it until meeting another --cpu option.\n"
+"--delay    time_in_ms   Wait time_in_ms milliseconds before recording samples.\n"
 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
 "                        [command]. Here time_in_sec may be any positive\n"
 "                        floating point number.\n"
@@ -425,6 +426,7 @@
   bool keep_failed_unwinding_debug_info_ = false;
   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
   bool child_inherit_;
+  uint64_t delay_in_ms_ = 0;
   double duration_in_sec_;
   bool can_dump_kernel_symbols_;
   bool dump_symbols_;
@@ -614,7 +616,7 @@
   } else if (!event_selection_set_.HasMonitoredTarget()) {
     if (workload != nullptr) {
       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
-      event_selection_set_.SetEnableOnExec(true);
+      event_selection_set_.SetEnableCondition(false, true);
     } else if (!app_package_name_.empty()) {
       // If app process is not created, wait for it. This allows simpleperf starts before
       // app process. In this way, we can have a better support of app start-up time profiling.
@@ -628,6 +630,10 @@
   } else {
     need_to_check_targets = true;
   }
+  if (delay_in_ms_ != 0) {
+    event_selection_set_.SetEnableCondition(false, false);
+  }
+
   // Profiling JITed/interpreted Java code is supported starting from Android P.
   // Also support profiling art interpreter on host.
   if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
@@ -695,6 +701,21 @@
     }
   }
 
+  if (delay_in_ms_ != 0) {
+    auto delay_callback = [this]() {
+      if (!event_selection_set_.SetEnableEvents(true)) {
+        return false;
+      }
+      if (!system_wide_collection_) {
+        // Dump maps in case there are new maps created while delaying.
+        return DumpMaps();
+      }
+      return true;
+    };
+    if (!loop->AddOneTimeEvent(SecondToTimeval(delay_in_ms_ / 1000), delay_callback)) {
+      return false;
+    }
+  }
   if (duration_in_sec_ != 0) {
     if (!loop->AddPeriodicEvent(
             SecondToTimeval(duration_in_sec_), [loop]() { return loop->ExitLoop(); },
@@ -1030,6 +1051,10 @@
     recorder.SetRecordCycles(true);
   }
 
+  if (!options.PullUintValue("--delay", &delay_in_ms_)) {
+    return false;
+  }
+
   if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
     return false;
   }
diff --git a/simpleperf/cmd_record_impl.h b/simpleperf/cmd_record_impl.h
index 457bb3b..f2436de 100644
--- a/simpleperf/cmd_record_impl.h
+++ b/simpleperf/cmd_record_impl.h
@@ -49,6 +49,7 @@
         {"--cpu", {OptionValueType::STRING, OptionType::ORDERED, AppRunnerType::ALLOWED}},
         {"--cpu-percent", {OptionValueType::UINT, OptionType::SINGLE, AppRunnerType::ALLOWED}},
         {"--decode-etm", {OptionValueType::NONE, OptionType::SINGLE, AppRunnerType::ALLOWED}},
+        {"--delay", {OptionValueType::UINT, OptionType::SINGLE, AppRunnerType::ALLOWED}},
         {"--record-timestamp", {OptionValueType::NONE, OptionType::SINGLE, AppRunnerType::ALLOWED}},
         {"--record-cycles", {OptionValueType::NONE, OptionType::SINGLE, AppRunnerType::ALLOWED}},
         {"--duration", {OptionValueType::DOUBLE, OptionType::SINGLE, AppRunnerType::ALLOWED}},
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index d969dc6..c1c8f40 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -1336,3 +1336,9 @@
   }));
   ASSERT_TRUE(has_comm);
 }
+
+TEST(record_cmd, delay_option) {
+  TemporaryFile tmpfile;
+  ASSERT_TRUE(RecordCmd()->Run(
+      {"-o", tmpfile.path, "-e", GetDefaultEvent(), "--delay", "100", "sleep", "1"}));
+}
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index a23fec1..19fb489 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -544,7 +544,7 @@
   } else if (!event_selection_set_.HasMonitoredTarget()) {
     if (workload != nullptr) {
       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
-      event_selection_set_.SetEnableOnExec(true);
+      event_selection_set_.SetEnableCondition(false, true);
     } else if (!app_package_name_.empty()) {
       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
       event_selection_set_.AddMonitoredProcesses(pids);
@@ -582,11 +582,6 @@
 
   // 4. Add signal/periodic Events.
   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
-  if (interval_in_ms_ != 0) {
-    if (!loop->UsePreciseTimer()) {
-      return false;
-    }
-  }
   std::chrono::time_point<std::chrono::steady_clock> start_time;
   std::vector<CountersInfo> counters;
   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index c1910ac..c75f804 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -452,36 +452,15 @@
   }
 }
 
-void EventSelectionSet::SetEnableOnExec(bool enable) {
+void EventSelectionSet::SetEnableCondition(bool enable_on_open, bool enable_on_exec) {
   for (auto& group : groups_) {
     for (auto& selection : group.selections) {
-      // If sampling is enabled on exec, then it is disabled at startup,
-      // otherwise it should be enabled at startup. Don't use
-      // ioctl(PERF_EVENT_IOC_ENABLE) to enable it after perf_event_open().
-      // Because some android kernels can't handle ioctl() well when cpu-hotplug
-      // happens. See http://b/25193162.
-      if (enable) {
-        selection.event_attr.enable_on_exec = 1;
-        selection.event_attr.disabled = 1;
-      } else {
-        selection.event_attr.enable_on_exec = 0;
-        selection.event_attr.disabled = 0;
-      }
+      selection.event_attr.disabled = !enable_on_open;
+      selection.event_attr.enable_on_exec = enable_on_exec;
     }
   }
 }
 
-bool EventSelectionSet::GetEnableOnExec() {
-  for (const auto& group : groups_) {
-    for (const auto& selection : group.selections) {
-      if (selection.event_attr.enable_on_exec == 0) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 void EventSelectionSet::SampleIdAll() {
   for (auto& group : groups_) {
     for (auto& selection : group.selections) {
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index f197dbf..e046035 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -121,8 +121,7 @@
   std::unordered_map<uint64_t, int> GetCpusById() const;
   std::map<int, size_t> GetHardwareCountersForCpus() const;
 
-  void SetEnableOnExec(bool enable);
-  bool GetEnableOnExec();
+  void SetEnableCondition(bool enable_on_open, bool enable_on_exec);
   void SampleIdAll();
   // Only set sample rate for events that haven't set sample rate.
   void SetSampleRateForNewEvents(const SampleRate& rate);