Merge "simpleperf: update doc for stat --per-core option."
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index adc4f0a..0f8f615 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -257,7 +257,8 @@
w = std::max(w, size);
};
- for (size_t i = 0; i < titles.size(); i++) {
+ // The last title is too long. Don't include it for width adjustment.
+ for (size_t i = 0; i + 1 < titles.size(); i++) {
adjust_width(width[i], titles[i].size());
}
@@ -956,8 +957,8 @@
if (sum.time_running < sum.time_enabled && sum.time_running != 0) {
scale = static_cast<double>(sum.time_enabled) / sum.time_running;
}
- if (system_wide_collection_ && report_per_thread_ && sum.time_running == 0) {
- // No need to report threads not running in system wide per thread report.
+ if ((report_per_thread_ || report_per_core_) && sum.time_running == 0) {
+ // No need to report threads or cpus not running.
return;
}
ThreadInfo* thread = nullptr;
@@ -1032,26 +1033,28 @@
else
fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
+ const char* COUNTER_MULTIPLEX_INFO =
+ "probably caused by hardware counter multiplexing (less counters than events).\n"
+ "Try --use-devfreq-counters if on a rooted device.";
+
if (cpus_ == std::vector<int>(1, -1) ||
event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
// We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
// if percentages < 100%, probably it is caused by hardware counter multiplexing.
if (!counters_always_available) {
- LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time.\n"
- << "Probably because there are less hardware counters available than events.\n"
- << "Try --use-devfreq-counters if on a rooted device.";
+ LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
+ << COUNTER_MULTIPLEX_INFO;
}
+ } else if (report_per_thread_) {
+ // We monitor each thread on each cpu.
+ LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
+ << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
+ << COUNTER_MULTIPLEX_INFO;
} else {
- // We monitor a thread on a cpu. A percentage represents
- // runtime_of_a_thread_on_a_cpu / runtime_of_a_thread_on_all_cpus. If percentage sum of a
- // thread < 100%, or total event count for a running thread is 0, probably it is caused by
- // hardware counter multiplexing. It is hard to detect the second case, so always print below
- // info.
- LOG(INFO) << "A percentage represents runtime_of_a_thread_on_a_cpu / "
- "runtime_of_a_thread_on_all_cpus.\n"
- << "If percentage sum of a thread < 100%, or total event count for a running\n"
- << "thread is 0, probably because there are less hardware counters available than\n"
- << "events. Try --use-devfreq-counters if on a rooted device.";
+ // We monitor some threads on each cpu.
+ LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
+ << "threads. If percentage sum < 99%, or report for an event is missing,\n"
+ << COUNTER_MULTIPLEX_INFO;
}
return true;
}
diff --git a/simpleperf/doc/executable_commands_reference.md b/simpleperf/doc/executable_commands_reference.md
index 22ed52f..5d51a15 100644
--- a/simpleperf/doc/executable_commands_reference.md
+++ b/simpleperf/doc/executable_commands_reference.md
@@ -294,10 +294,26 @@
By default, stat cmd outputs an event count sum for all monitored cpu cores. But when `--per-core`
option is used, stat cmd outputs an event count for each core. It can be used to see how events
are distributed on different cores.
+When stating non-system wide with `--per-core` option, simpleperf creates a perf event for each
+monitored thread on each core. When a thread is in running state, perf events on all cores are
+enabled, but only the perf event on the core running the thread is in running state. So the
+percentage comment shows runtime_on_a_core / runtime_on_all_cores. Note that, percentage is still
+affected by hardware counter multiplexing. Check simpleperf log output for ways to distinguish it.
```sh
# Print event counts for each cpu running threads in process 11904.
+# A percentage shows runtime_on_a_cpu / runtime_on_all_cpus.
$ simpleperf stat --per-core -p 11904 --duration 1
+Performance counter statistics:
+
+# cpu count event_name # percentage = event_run_time / enabled_time
+ 7 56,552,838 cpu-cycles # (60%)
+ 3 25,958,605 cpu-cycles # (20%)
+ 0 22,822,698 cpu-cycles # (15%)
+ 1 6,661,495 cpu-cycles # (5%)
+ 4 1,519,093 cpu-cycles # (0%)
+
+Total test time: 1.001082 seconds.
# Print event counts for each cpu system wide.
$ su 0 simpleperf stat --per-core -a --duration 1