diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index eddccfc..c513e2e 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -85,10 +85,17 @@
   ASSERT_TRUE(RecordCmd()->Run({"-a", "-e", "sched:sched_switch", "sleep", "1"}));
 }
 
+extern bool IsBranchSamplingSupported();
+
 TEST(record_cmd, branch_sampling) {
-  ASSERT_TRUE(RecordCmd()->Run({"-a", "-b", "sleep", "1"}));
-  ASSERT_TRUE(RecordCmd()->Run({"-j", "any,any_call,any_ret,ind_call", "sleep", "1"}));
-  ASSERT_TRUE(RecordCmd()->Run({"-j", "any,k", "sleep", "1"}));
-  ASSERT_TRUE(RecordCmd()->Run({"-j", "any,u", "sleep", "1"}));
-  ASSERT_FALSE(RecordCmd()->Run({"-j", "u", "sleep", "1"}));
+  if (IsBranchSamplingSupported()) {
+    ASSERT_TRUE(RecordCmd()->Run({"-a", "-b", "sleep", "1"}));
+    ASSERT_TRUE(RecordCmd()->Run({"-j", "any,any_call,any_ret,ind_call", "sleep", "1"}));
+    ASSERT_TRUE(RecordCmd()->Run({"-j", "any,k", "sleep", "1"}));
+    ASSERT_TRUE(RecordCmd()->Run({"-j", "any,u", "sleep", "1"}));
+    ASSERT_FALSE(RecordCmd()->Run({"-j", "u", "sleep", "1"}));
+  } else {
+    GTEST_LOG_(INFO)
+        << "This test does nothing as branch stack sampling is not supported on this device.";
+  }
 }
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
index f0a1ad5..b342ab5 100644
--- a/simpleperf/event_fd.cpp
+++ b/simpleperf/event_fd.cpp
@@ -38,15 +38,18 @@
   return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
 }
 
-std::unique_ptr<EventFd> EventFd::OpenEventFileForProcess(const perf_event_attr& attr, pid_t pid) {
-  return OpenEventFile(attr, pid, -1);
+std::unique_ptr<EventFd> EventFd::OpenEventFileForProcess(const perf_event_attr& attr, pid_t pid,
+                                                          bool report_error) {
+  return OpenEventFile(attr, pid, -1, report_error);
 }
 
-std::unique_ptr<EventFd> EventFd::OpenEventFileForCpu(const perf_event_attr& attr, int cpu) {
-  return OpenEventFile(attr, -1, cpu);
+std::unique_ptr<EventFd> EventFd::OpenEventFileForCpu(const perf_event_attr& attr, int cpu,
+                                                      bool report_error) {
+  return OpenEventFile(attr, -1, cpu, report_error);
 }
 
-std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr, pid_t pid, int cpu) {
+std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr, pid_t pid, int cpu,
+                                                bool report_error) {
   perf_event_attr perf_attr = attr;
   std::string event_name = "unknown event";
   const EventType* event_type =
@@ -56,15 +59,14 @@
   }
   int perf_event_fd = perf_event_open(&perf_attr, pid, cpu, -1, 0);
   if (perf_event_fd == -1) {
-    // It depends whether the perf_event_file configuration is supported by the kernel and the
-    // machine. So fail to open the file is not an error.
-    PLOG(DEBUG) << "open perf_event_file (event " << event_name << ", pid " << pid << ", cpu "
-                << cpu << ") failed";
+    (report_error ? PLOG(ERROR) : PLOG(DEBUG)) << "open perf_event_file (event " << event_name
+                                               << ", pid " << pid << ", cpu " << cpu << ") failed";
     return nullptr;
   }
   if (fcntl(perf_event_fd, F_SETFD, FD_CLOEXEC) == -1) {
-    PLOG(ERROR) << "fcntl(FD_CLOEXEC) for perf_event_file (event " << event_name << ", pid " << pid
-                << ", cpu " << cpu << ") failed";
+    (report_error ? PLOG(ERROR) : PLOG(DEBUG)) << "fcntl(FD_CLOEXEC) for perf_event_file (event "
+                                               << event_name << ", pid " << pid << ", cpu " << cpu
+                                               << ") failed";
     return nullptr;
   }
   return std::unique_ptr<EventFd>(new EventFd(perf_event_fd, event_name, pid, cpu));
diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h
index 36ea0cb..e05761e 100644
--- a/simpleperf/event_fd.h
+++ b/simpleperf/event_fd.h
@@ -37,9 +37,12 @@
 // EventFd represents an opened perf_event_file.
 class EventFd {
  public:
-  static std::unique_ptr<EventFd> OpenEventFileForProcess(const perf_event_attr& attr, pid_t pid);
-  static std::unique_ptr<EventFd> OpenEventFileForCpu(const perf_event_attr& attr, int cpu);
-  static std::unique_ptr<EventFd> OpenEventFile(const perf_event_attr& attr, pid_t pid, int cpu);
+  static std::unique_ptr<EventFd> OpenEventFileForProcess(const perf_event_attr& attr, pid_t pid,
+                                                          bool report_error = true);
+  static std::unique_ptr<EventFd> OpenEventFileForCpu(const perf_event_attr& attr, int cpu,
+                                                      bool report_error = true);
+  static std::unique_ptr<EventFd> OpenEventFile(const perf_event_attr& attr, pid_t pid, int cpu,
+                                                bool report_error = true);
 
   ~EventFd();
 
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 6f14b80..644938c 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -22,6 +22,18 @@
 #include "event_attr.h"
 #include "event_type.h"
 
+bool IsBranchSamplingSupported() {
+  const EventType* event_type = EventTypeFactory::FindEventTypeByName("cpu-cycles", false);
+  if (event_type == nullptr) {
+    return false;
+  }
+  perf_event_attr attr = CreateDefaultPerfEventAttr(*event_type);
+  attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+  attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
+  auto event_fd = EventFd::OpenEventFileForProcess(attr, getpid(), false);
+  return event_fd != nullptr;
+}
+
 void EventSelectionSet::AddEventType(const EventType& event_type) {
   EventSelection selection;
   selection.event_type = &event_type;
@@ -64,6 +76,10 @@
     LOG(ERROR) << "Invalid branch_sample_type: 0x" << std::hex << branch_sample_type;
     return false;
   }
+  if (branch_sample_type != 0 && !IsBranchSamplingSupported()) {
+    LOG(ERROR) << "branch stack sampling is not supported on this device.";
+    return false;
+  }
   for (auto& selection : selections_) {
     perf_event_attr& attr = selection.event_attr;
     if (branch_sample_type != 0) {
@@ -103,8 +119,6 @@
   for (auto& selection : selections_) {
     auto event_fd = EventFd::OpenEventFileForProcess(selection.event_attr, pid);
     if (event_fd == nullptr) {
-      PLOG(ERROR) << "failed to open perf event file for event type " << selection.event_type->name
-                  << " on pid " << pid;
       return false;
     }
     selection.event_fds.push_back(std::move(event_fd));
diff --git a/simpleperf/event_type.cpp b/simpleperf/event_type.cpp
index a3b8fd2..e4c50e1 100644
--- a/simpleperf/event_type.cpp
+++ b/simpleperf/event_type.cpp
@@ -37,7 +37,8 @@
 };
 
 static bool IsEventTypeSupportedByKernel(const EventType& event_type) {
-  auto event_fd = EventFd::OpenEventFileForProcess(CreateDefaultPerfEventAttr(event_type), getpid());
+  auto event_fd =
+      EventFd::OpenEventFileForProcess(CreateDefaultPerfEventAttr(event_type), getpid(), false);
   return event_fd != nullptr;
 }
 
diff --git a/simpleperf/event_type_table.h b/simpleperf/event_type_table.h
index 895cc85..a77be0a 100644
--- a/simpleperf/event_type_table.h
+++ b/simpleperf/event_type_table.h
@@ -20,43 +20,43 @@
 {"alignment-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS},
 {"emulation-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS},
 
-{"L1-dcache-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-dcache-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"L1-dcache-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"L1-dcache-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"L1-dcache-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"L1-dcache-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"L1-dcache-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
-{"L1-icache-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"L1-icache-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"L1-icache-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"L1-icache-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"L1-icache-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"L1-icache-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"L1-icache-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
-{"LLC-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"LLC-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"LLC-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"LLC-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"LLC-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"LLC-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"LLC-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
-{"dTLB-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"dTLB-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"dTLB-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"dTLB-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"dTLB-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"dTLB-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"dTLB-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_DTLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
-{"iTLB-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"iTLB-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"iTLB-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"iTLB-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"iTLB-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"iTLB-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"iTLB-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_ITLB) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
-{"branch-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"branch-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"branch-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"branch-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"branch-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"branch-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"branch-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
-{"node-loades", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
+{"node-loads", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"node-load-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
 {"node-stores", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
 {"node-store-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
diff --git a/simpleperf/generate_event_type_table.py b/simpleperf/generate_event_type_table.py
index b3fb897..9fbd278 100755
--- a/simpleperf/generate_event_type_table.py
+++ b/simpleperf/generate_event_type_table.py
@@ -81,7 +81,7 @@
                       ["branch", "PERF_COUNT_HW_CACHE_BPU"],
                       ["node", "PERF_COUNT_HW_CACHE_NODE"],
                       ]
-    hw_cache_ops = [["loades", "load", "PERF_COUNT_HW_CACHE_OP_READ"],
+    hw_cache_ops = [["loads", "load", "PERF_COUNT_HW_CACHE_OP_READ"],
                     ["stores", "store", "PERF_COUNT_HW_CACHE_OP_WRITE"],
                     ["prefetches", "prefetch",
                      "PERF_COUNT_HW_CACHE_OP_PREFETCH"],
diff --git a/simpleperf/workload.cpp b/simpleperf/workload.cpp
index 9d395cb..2364b1c 100644
--- a/simpleperf/workload.cpp
+++ b/simpleperf/workload.cpp
@@ -31,6 +31,18 @@
   return nullptr;
 }
 
+Workload::~Workload() {
+  if (work_pid_ != -1 && work_state_ != NotYetCreateNewProcess && work_state_ != Finished) {
+    kill(work_pid_, SIGKILL);
+  }
+  if (start_signal_fd_ != -1) {
+    close(start_signal_fd_);
+  }
+  if (exec_child_fd_ != -1) {
+    close(exec_child_fd_);
+  }
+}
+
 static void ChildProcessFn(std::vector<std::string>& args, int start_signal_fd, int exec_child_fd);
 
 bool Workload::CreateNewProcess() {
diff --git a/simpleperf/workload.h b/simpleperf/workload.h
index 57622c8..9e2f0db 100644
--- a/simpleperf/workload.h
+++ b/simpleperf/workload.h
@@ -36,14 +36,7 @@
  public:
   static std::unique_ptr<Workload> CreateWorkload(const std::vector<std::string>& args);
 
-  ~Workload() {
-    if (start_signal_fd_ != -1) {
-      close(start_signal_fd_);
-    }
-    if (exec_child_fd_ != -1) {
-      close(exec_child_fd_);
-    }
-  }
+  ~Workload();
 
   bool Start();
   bool IsFinished();
