simpleperf: fix tests for dwarf callgraph unwinding.

32-bit simpleperf can't unwind a 64-bit `sleep` process in aarch64
environment. It makes following error in tests:
simpleperf is built in arch arm, and can't do stack unwinding for arch arm64

So make the workload be able to start a process
running callback function. By profiling that process, we can
guarantee that 32-bit simpleperf is profiling a 32-bit process.

Also fix a flaky test IOEventLoop.signal.
Build 32-bit simpleperf_unit_test on 64-bit devices.

Bug: http://b/33167911
Test: run simpleperf_unit_test.
Change-Id: I82741dc5d90c73c1890f834d8e2a9188421a3828
diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk
index 9addd06..e993906 100644
--- a/simpleperf/Android.mk
+++ b/simpleperf/Android.mk
@@ -279,7 +279,7 @@
   $($(LOCAL_2ND_ARCH_VAR_PREFIX)TARGET_OBJCOPY) --add-section .testzipdata=$$TMP_FILE $(linked_module) && \
   rm -f $$TMP_FILE
 
-LOCAL_MULTILIB := first
+LOCAL_MULTILIB := both
 LOCAL_FORCE_STATIC_EXECUTABLE := true
 include $(LLVM_DEVICE_BUILD_MK)
 include $(BUILD_NATIVE_TEST)
diff --git a/simpleperf/IOEventLoop_test.cpp b/simpleperf/IOEventLoop_test.cpp
index 90bb4fa..dc7a4da 100644
--- a/simpleperf/IOEventLoop_test.cpp
+++ b/simpleperf/IOEventLoop_test.cpp
@@ -18,6 +18,7 @@
 
 #include <gtest/gtest.h>
 
+#include <atomic>
 #include <chrono>
 #include <thread>
 
@@ -105,13 +106,15 @@
     }
     return true;
   }));
-  std::thread thread([]() {
-    for (int i = 0; i < 100; ++i) {
+  std::atomic<bool> stop_thread(false);
+  std::thread thread([&]() {
+    while (!stop_thread) {
       usleep(1000);
       kill(getpid(), SIGINT);
     }
   });
   ASSERT_TRUE(loop.RunLoop());
+  stop_thread = true;
   thread.join();
   ASSERT_EQ(100, count);
 }
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index b863f01..35f330e 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -141,10 +141,13 @@
 
 TEST(record_cmd, dwarf_callchain_sampling) {
   if (IsDwarfCallChainSamplingSupported()) {
-    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf"}));
-    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf,16384"}));
-    ASSERT_FALSE(RunRecordCmd({"--call-graph", "dwarf,65536"}));
-    ASSERT_TRUE(RunRecordCmd({"-g"}));
+    std::vector<std::unique_ptr<Workload>> workloads;
+    CreateProcesses(1, &workloads);
+    std::string pid = std::to_string(workloads[0]->GetPid());
+    ASSERT_TRUE(RunRecordCmd({"-p", pid, "--call-graph", "dwarf"}));
+    ASSERT_TRUE(RunRecordCmd({"-p", pid, "--call-graph", "dwarf,16384"}));
+    ASSERT_FALSE(RunRecordCmd({"-p", pid, "--call-graph", "dwarf,65536"}));
+    ASSERT_TRUE(RunRecordCmd({"-p", pid, "-g"}));
   } else {
     GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
                         "not supported on this device.";
@@ -172,7 +175,10 @@
 
 TEST(record_cmd, post_unwind_option) {
   if (IsDwarfCallChainSamplingSupported()) {
-    ASSERT_TRUE(RunRecordCmd({"--call-graph", "dwarf", "--post-unwind"}));
+    std::vector<std::unique_ptr<Workload>> workloads;
+    CreateProcesses(1, &workloads);
+    std::string pid = std::to_string(workloads[0]->GetPid());
+    ASSERT_TRUE(RunRecordCmd({"-p", pid, "--call-graph", "dwarf", "--post-unwind"}));
   } else {
     GTEST_LOG_(INFO) << "This test does nothing as dwarf callchain sampling is "
                         "not supported on this device.";
@@ -291,11 +297,14 @@
   CheckDsoSymbolRecords(tmpfile.path, true, &success);
   ASSERT_TRUE(success);
   if (IsDwarfCallChainSamplingSupported()) {
-    ASSERT_TRUE(RunRecordCmd({"-g"}, tmpfile.path));
+    std::vector<std::unique_ptr<Workload>> workloads;
+    CreateProcesses(1, &workloads);
+    std::string pid = std::to_string(workloads[0]->GetPid());
+    ASSERT_TRUE(RunRecordCmd({"-p", pid, "-g"}, tmpfile.path));
     bool success;
     CheckDsoSymbolRecords(tmpfile.path, false, &success);
     ASSERT_TRUE(success);
-    ASSERT_TRUE(RunRecordCmd({"-g", "--dump-symbols"}, tmpfile.path));
+    ASSERT_TRUE(RunRecordCmd({"-p", pid, "-g", "--dump-symbols"}, tmpfile.path));
     CheckDsoSymbolRecords(tmpfile.path, true, &success);
     ASSERT_TRUE(success);
   }
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index 98190ee..f34be5c 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -446,9 +446,12 @@
 
 TEST_F(ReportCommandTest, dwarf_callgraph) {
   if (IsDwarfCallChainSamplingSupported()) {
+    std::vector<std::unique_ptr<Workload>> workloads;
+    CreateProcesses(1, &workloads);
+    std::string pid = std::to_string(workloads[0]->GetPid());
     TemporaryFile tmp_file;
     ASSERT_TRUE(
-        RecordCmd()->Run({"-g", "-o", tmp_file.path, "sleep", SLEEP_SEC}));
+        RecordCmd()->Run({"-p", pid, "-g", "-o", tmp_file.path, "sleep", SLEEP_SEC}));
     ReportRaw(tmp_file.path, {"-g"});
     ASSERT_TRUE(success);
   } else {
diff --git a/simpleperf/cmd_stat_test.cpp b/simpleperf/cmd_stat_test.cpp
index 25fcaf9..125f938 100644
--- a/simpleperf/cmd_stat_test.cpp
+++ b/simpleperf/cmd_stat_test.cpp
@@ -55,9 +55,15 @@
 void CreateProcesses(size_t count,
                      std::vector<std::unique_ptr<Workload>>* workloads) {
   workloads->clear();
+  // Create workloads run longer than profiling time.
+  auto function = []() {
+    while (true) {
+      for (volatile int i = 0; i < 10000; ++i);
+      usleep(1);
+    }
+  };
   for (size_t i = 0; i < count; ++i) {
-    // Create a workload runs longer than profiling time.
-    auto workload = Workload::CreateWorkload({"sleep", "1000"});
+    auto workload = Workload::CreateWorkload(function);
     ASSERT_TRUE(workload != nullptr);
     ASSERT_TRUE(workload->Start());
     workloads->push_back(std::move(workload));
diff --git a/simpleperf/workload.cpp b/simpleperf/workload.cpp
index 1d34c11..dcb0e78 100644
--- a/simpleperf/workload.cpp
+++ b/simpleperf/workload.cpp
@@ -25,7 +25,15 @@
 #include <android-base/logging.h>
 
 std::unique_ptr<Workload> Workload::CreateWorkload(const std::vector<std::string>& args) {
-  std::unique_ptr<Workload> workload(new Workload(args));
+  std::unique_ptr<Workload> workload(new Workload(args, std::function<void ()>()));
+  if (workload != nullptr && workload->CreateNewProcess()) {
+    return workload;
+  }
+  return nullptr;
+}
+
+std::unique_ptr<Workload> Workload::CreateWorkload(const std::function<void ()>& function) {
+  std::unique_ptr<Workload> workload(new Workload(std::vector<std::string>(), function));
   if (workload != nullptr && workload->CreateNewProcess()) {
     return workload;
   }
@@ -47,8 +55,6 @@
   }
 }
 
-static void ChildProcessFn(std::vector<std::string>& args, int start_signal_fd, int exec_child_fd);
-
 bool Workload::CreateNewProcess() {
   CHECK_EQ(work_state_, NotYetCreateNewProcess);
 
@@ -78,7 +84,7 @@
     // In child process.
     close(start_signal_pipe[1]);
     close(exec_child_pipe[0]);
-    ChildProcessFn(args_, start_signal_pipe[0], exec_child_pipe[1]);
+    ChildProcessFn(start_signal_pipe[0], exec_child_pipe[1]);
     _exit(0);
   }
   // In parent process.
@@ -91,28 +97,33 @@
   return true;
 }
 
-static void ChildProcessFn(std::vector<std::string>& args, int start_signal_fd, int exec_child_fd) {
+void Workload::ChildProcessFn(int start_signal_fd, int exec_child_fd) {
   // Die if parent exits.
   prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
-  std::vector<char*> argv(args.size() + 1);
-  for (size_t i = 0; i < args.size(); ++i) {
-    argv[i] = &args[i][0];
-  }
-  argv[args.size()] = nullptr;
 
   char start_signal = 0;
   ssize_t nread = TEMP_FAILURE_RETRY(read(start_signal_fd, &start_signal, 1));
   if (nread == 1 && start_signal == 1) {
     close(start_signal_fd);
-    execvp(argv[0], argv.data());
-    // If execvp() succeed, we will not arrive here. But if it failed, we need to
-    // report the failure to the parent process by writing 1 to exec_child_fd.
-    int saved_errno = errno;
-    char exec_child_failed = 1;
-    TEMP_FAILURE_RETRY(write(exec_child_fd, &exec_child_failed, 1));
-    close(exec_child_fd);
-    errno = saved_errno;
-    PLOG(ERROR) << "child process failed to execvp(" << argv[0] << ")";
+    if (child_proc_function_) {
+      close(exec_child_fd);
+      child_proc_function_();
+    } else {
+      char* argv[child_proc_args_.size() + 1];
+      for (size_t i = 0; i < child_proc_args_.size(); ++i) {
+        argv[i] = &child_proc_args_[i][0];
+      }
+      argv[child_proc_args_.size()] = nullptr;
+      execvp(argv[0], argv);
+      // If execvp() succeed, we will not arrive here. But if it failed, we need to
+      // report the failure to the parent process by writing 1 to exec_child_fd.
+      int saved_errno = errno;
+      char exec_child_failed = 1;
+      TEMP_FAILURE_RETRY(write(exec_child_fd, &exec_child_failed, 1));
+      close(exec_child_fd);
+      errno = saved_errno;
+      PLOG(ERROR) << "child process failed to execvp(" << argv[0] << ")";
+    }
   } else {
     PLOG(ERROR) << "child process failed to receive start_signal, nread = " << nread;
   }
diff --git a/simpleperf/workload.h b/simpleperf/workload.h
index fa754b5..2141830 100644
--- a/simpleperf/workload.h
+++ b/simpleperf/workload.h
@@ -19,6 +19,7 @@
 
 #include <sys/types.h>
 #include <chrono>
+#include <functional>
 #include <string>
 #include <vector>
 
@@ -34,6 +35,7 @@
 
  public:
   static std::unique_ptr<Workload> CreateWorkload(const std::vector<std::string>& args);
+  static std::unique_ptr<Workload> CreateWorkload(const std::function<void ()>& function);
 
   ~Workload();
 
@@ -43,19 +45,24 @@
   }
 
  private:
-  explicit Workload(const std::vector<std::string>& args)
+  explicit Workload(const std::vector<std::string>& args,
+                    const std::function<void ()>& function)
       : work_state_(NotYetCreateNewProcess),
-        args_(args),
+        child_proc_args_(args),
+        child_proc_function_(function),
         work_pid_(-1),
         start_signal_fd_(-1),
         exec_child_fd_(-1) {
   }
 
   bool CreateNewProcess();
+  void ChildProcessFn(int start_signal_fd, int exec_child_fd);
   bool WaitChildProcess(bool wait_forever, bool is_child_killed);
 
   WorkState work_state_;
-  std::vector<std::string> args_;
+  // The child process either executes child_proc_args or run child_proc_function.
+  std::vector<std::string> child_proc_args_;
+  std::function<void ()> child_proc_function_;
   pid_t work_pid_;
   int start_signal_fd_;  // The parent process writes 1 to start workload in the child process.
   int exec_child_fd_;    // The child process writes 1 to notify that execvp() failed.