Simpleperf: support callgraph option in report command.

Bug: 19483574
Change-Id: I7c5558a71ea15650c1d3e7afa268b3fbc11543d7
diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk
index 278bc24..b923738 100644
--- a/simpleperf/Android.mk
+++ b/simpleperf/Android.mk
@@ -32,6 +32,7 @@
 # libsimpleperf
 # =========================================================
 libsimpleperf_common_src_files := \
+  callchain.cpp \
   cmd_dumprecord.cpp \
   cmd_help.cpp \
   cmd_report.cpp \
diff --git a/simpleperf/callchain.cpp b/simpleperf/callchain.cpp
new file mode 100644
index 0000000..c914d1e
--- /dev/null
+++ b/simpleperf/callchain.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "callchain.h"
+
+#include <queue>
+#include <base/logging.h>
+#include "sample_tree.h"
+
+static bool MatchSampleByName(const SampleEntry* sample1, const SampleEntry* sample2) {
+  return (sample1->symbol->name == sample2->symbol->name);
+}
+
+static size_t GetMatchingLengthInNode(const CallChainNode* node,
+                                      const std::vector<SampleEntry*>& chain, size_t chain_start) {
+  size_t i, j;
+  for (i = 0, j = chain_start; i < node->chain.size() && j < chain.size(); ++i, ++j) {
+    if (!MatchSampleByName(node->chain[i], chain[j])) {
+      break;
+    }
+  }
+  return i;
+}
+
+static CallChainNode* FindMatchingNode(const std::vector<std::unique_ptr<CallChainNode>>& nodes,
+                                       const SampleEntry* sample) {
+  for (auto& node : nodes) {
+    if (MatchSampleByName(node->chain.front(), sample)) {
+      return node.get();
+    }
+  }
+  return nullptr;
+}
+
+static std::unique_ptr<CallChainNode> AllocateNode(const std::vector<SampleEntry*>& chain,
+                                                   size_t chain_start, uint64_t period,
+                                                   uint64_t children_period) {
+  std::unique_ptr<CallChainNode> node(new CallChainNode);
+  for (size_t i = chain_start; i < chain.size(); ++i) {
+    node->chain.push_back(chain[i]);
+  }
+  node->period = period;
+  node->children_period = children_period;
+  return node;
+}
+
+static void SplitNode(CallChainNode* parent, size_t parent_length) {
+  std::unique_ptr<CallChainNode> child =
+      AllocateNode(parent->chain, parent_length, parent->period, parent->children_period);
+  child->children = std::move(parent->children);
+  parent->period = 0;
+  parent->children_period = child->period + child->children_period;
+  parent->chain.resize(parent_length);
+  parent->children.clear();
+  parent->children.push_back(std::move(child));
+}
+
+void CallChainRoot::AddCallChain(const std::vector<SampleEntry*>& callchain, uint64_t period) {
+  children_period += period;
+  CallChainNode* p = FindMatchingNode(children, callchain[0]);
+  if (p == nullptr) {
+    std::unique_ptr<CallChainNode> new_node = AllocateNode(callchain, 0, period, 0);
+    children.push_back(std::move(new_node));
+    return;
+  }
+  size_t callchain_pos = 0;
+  while (true) {
+    size_t match_length = GetMatchingLengthInNode(p, callchain, callchain_pos);
+    CHECK_GT(match_length, 0u);
+    callchain_pos += match_length;
+    bool find_child = true;
+    if (match_length < p->chain.size()) {
+      SplitNode(p, match_length);
+      find_child = false;  // No need to find matching node in p->children.
+    }
+    if (callchain_pos == callchain.size()) {
+      p->period += period;
+      return;
+    }
+    p->children_period += period;
+    if (find_child) {
+      CallChainNode* np = FindMatchingNode(p->children, callchain[callchain_pos]);
+      if (np != nullptr) {
+        p = np;
+        continue;
+      }
+    }
+    std::unique_ptr<CallChainNode> new_node = AllocateNode(callchain, callchain_pos, period, 0);
+    p->children.push_back(std::move(new_node));
+    break;
+  }
+}
+
+static bool CompareNodeByPeriod(const std::unique_ptr<CallChainNode>& n1,
+                                const std::unique_ptr<CallChainNode>& n2) {
+  uint64_t period1 = n1->period + n1->children_period;
+  uint64_t period2 = n2->period + n2->children_period;
+  return period1 > period2;
+}
+
+void CallChainRoot::SortByPeriod() {
+  std::queue<std::vector<std::unique_ptr<CallChainNode>>*> queue;
+  queue.push(&children);
+  while (!queue.empty()) {
+    std::vector<std::unique_ptr<CallChainNode>>* v = queue.front();
+    queue.pop();
+    std::sort(v->begin(), v->end(), CompareNodeByPeriod);
+    for (auto& node : *v) {
+      queue.push(&node->children);
+    }
+  }
+}
diff --git a/simpleperf/callchain.h b/simpleperf/callchain.h
new file mode 100644
index 0000000..4b8a9d4
--- /dev/null
+++ b/simpleperf/callchain.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_CALLCHAIN_H_
+#define SIMPLE_PERF_CALLCHAIN_H_
+
+#include <memory>
+#include <vector>
+
+struct SampleEntry;
+
+struct CallChainNode {
+  uint64_t period;
+  uint64_t children_period;
+  std::vector<SampleEntry*> chain;
+  std::vector<std::unique_ptr<CallChainNode>> children;
+};
+
+struct CallChainRoot {
+  uint64_t children_period;
+  std::vector<std::unique_ptr<CallChainNode>> children;
+
+  CallChainRoot() : children_period(0) {
+  }
+
+  void AddCallChain(const std::vector<SampleEntry*>& callchain, uint64_t period);
+  void SortByPeriod();
+};
+
+#endif  // SIMPLE_PERF_CALLCHAIN_H_
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index 87479bd..0e8f8e6 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -238,6 +238,7 @@
             "                  the instruction addresses. Only valid for perf.data recorded with\n"
             "                  -b/-j option.\n"
             "    --children    Print the overhead accumulated by appearing in the callchain.\n"
+            "    -g            Print call graph.\n"
             "    -i <file>     Specify path of record file, default is perf.data.\n"
             "    -n            Print the sample count for each item.\n"
             "    --no-demangle        Don't demangle symbol names.\n"
@@ -249,7 +250,8 @@
             "    --symfs <dir>  Look for files with symbols relative to this directory.\n"),
         record_filename_("perf.data"),
         use_branch_address_(false),
-        accumulate_callchain_(false) {
+        accumulate_callchain_(false),
+        print_callgraph_(false) {
     compare_sample_func_t compare_sample_callback = std::bind(
         &ReportCommand::CompareSampleEntry, this, std::placeholders::_1, std::placeholders::_2);
     sample_tree_ = std::unique_ptr<SampleTree>(new SampleTree(compare_sample_callback));
@@ -270,6 +272,7 @@
   void CollectReportEntryWidth(const SampleEntry& sample);
   void PrintReportHeader();
   void PrintReportEntry(const SampleEntry& sample);
+  void PrintCallGraph(const SampleEntry& sample);
 
   std::string record_filename_;
   std::unique_ptr<RecordFileReader> record_file_reader_;
@@ -280,6 +283,7 @@
   bool use_branch_address_;
   std::string record_cmdline_;
   bool accumulate_callchain_;
+  bool print_callgraph_;
 };
 
 bool ReportCommand::Run(const std::vector<std::string>& args) {
@@ -313,6 +317,9 @@
       use_branch_address_ = true;
     } else if (args[i] == "--children") {
       accumulate_callchain_ = true;
+    } else if (args[i] == "-g") {
+      print_callgraph_ = true;
+      accumulate_callchain_ = true;
     } else if (args[i] == "-i") {
       if (!NextArgumentOrError(args, &i)) {
         return false;
@@ -471,6 +478,7 @@
       std::vector<SampleEntry*> callchain;
       callchain.push_back(sample);
       const std::vector<uint64_t>& ips = r.callchain_data.ips;
+      bool first_ip = true;
       for (auto& ip : ips) {
         if (ip >= PERF_CONTEXT_MAX) {
           switch (ip) {
@@ -484,12 +492,32 @@
               LOG(ERROR) << "Unexpected perf_context in callchain: " << ip;
           }
         } else {
-          sample =
+          if (first_ip) {
+            // Remove duplication with sampled ip.
+            if (ip == r.ip_data.ip) {
+              continue;
+            }
+            first_ip = false;
+          }
+          SampleEntry* sample =
               sample_tree_->AddCallChainSample(r.tid_data.pid, r.tid_data.tid, ip, r.time_data.time,
                                                r.period_data.period, in_kernel, callchain);
           callchain.push_back(sample);
         }
       }
+      if (print_callgraph_) {
+        std::set<SampleEntry*> added_set;
+        while (callchain.size() >= 2) {
+          SampleEntry* sample = callchain[0];
+          callchain.erase(callchain.begin());
+          // Add only once for recursive calls on callchain.
+          if (added_set.find(sample) != added_set.end()) {
+            continue;
+          }
+          added_set.insert(sample);
+          sample_tree_->InsertCallChainForSample(sample, callchain, r.period_data.period);
+        }
+      }
     }
   }
 }
@@ -568,6 +596,49 @@
       printf("%s\n", item->Show(sample).c_str());
     }
   }
+  if (print_callgraph_) {
+    PrintCallGraph(sample);
+  }
+}
+
+static void PrintCallGraphEntry(size_t depth, std::string prefix,
+                                const std::unique_ptr<CallChainNode>& node, uint64_t parent_period,
+                                bool last) {
+  if (depth > 20) {
+    LOG(WARNING) << "truncated callgraph at depth " << depth;
+    return;
+  }
+  prefix += "|";
+  printf("%s\n", prefix.c_str());
+  if (last) {
+    prefix.back() = ' ';
+  }
+  std::string percentage_s = "-- ";
+  if (node->period + node->children_period != parent_period) {
+    double percentage = 100.0 * (node->period + node->children_period) / parent_period;
+    percentage_s = android::base::StringPrintf("--%.2lf%%-- ", percentage);
+  }
+  printf("%s%s%s\n", prefix.c_str(), percentage_s.c_str(), node->chain[0]->symbol->name.c_str());
+  prefix.append(percentage_s.size(), ' ');
+  for (size_t i = 1; i < node->chain.size(); ++i) {
+    printf("%s%s\n", prefix.c_str(), node->chain[i]->symbol->name.c_str());
+  }
+
+  for (size_t i = 0; i < node->children.size(); ++i) {
+    PrintCallGraphEntry(depth + 1, prefix, node->children[i], node->children_period,
+                        (i + 1 == node->children.size()));
+  }
+}
+
+void ReportCommand::PrintCallGraph(const SampleEntry& sample) {
+  std::string prefix = "       ";
+  printf("%s|\n", prefix.c_str());
+  printf("%s-- %s\n", prefix.c_str(), sample.symbol->name.c_str());
+  prefix.append(3, ' ');
+  for (size_t i = 0; i < sample.callchain.children.size(); ++i) {
+    PrintCallGraphEntry(1, prefix, sample.callchain.children[i], sample.callchain.children_period,
+                        (i + 1 == sample.callchain.children.size()));
+  }
 }
 
 __attribute__((constructor)) static void RegisterReportCommand() {
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index 236673f..a0dc596 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -31,6 +31,7 @@
   static void SetUpTestCase() {
     ASSERT_TRUE(RecordCmd()->Run({"-a", "sleep", "1"}));
     ASSERT_TRUE(RecordCmd()->Run({"-a", "-o", "perf2.data", "sleep", "1"}));
+    ASSERT_TRUE(RecordCmd()->Run({"-g", "-o", "perf_g.data", "sleep", "1"}));
   }
 };
 
@@ -50,6 +51,14 @@
   ASSERT_TRUE(ReportCmd()->Run({"--sort", "comm,pid,dso,symbol"}));
 }
 
+TEST_F(ReportCommandTest, children_option) {
+  ASSERT_TRUE(ReportCmd()->Run({"--children", "-i", "perf_g.data"}));
+}
+
+TEST_F(ReportCommandTest, callgraph_option) {
+  ASSERT_TRUE(ReportCmd()->Run({"-g", "-i", "perf_g.data"}));
+}
+
 extern bool IsBranchSamplingSupported();
 
 TEST(report_cmd, use_branch_address) {
@@ -62,8 +71,3 @@
         << "This test does nothing as branch stack sampling is not supported on this device.";
   }
 }
-
-TEST(report_cmd, children_option) {
-  ASSERT_TRUE(RecordCmd()->Run({"-g", "sleep", "1"}));
-  ASSERT_TRUE(ReportCmd()->Run({"--children"}));
-}
diff --git a/simpleperf/sample_tree.cpp b/simpleperf/sample_tree.cpp
index d07a8d2..3f0e5b3 100644
--- a/simpleperf/sample_tree.cpp
+++ b/simpleperf/sample_tree.cpp
@@ -166,20 +166,8 @@
   const MapEntry* map = FindMap(thread, ip, in_kernel);
   const SymbolEntry* symbol = FindSymbol(map, ip);
 
-  SampleEntry value = {
-      ip, time, period,
-      0,  // accumulated_period
-      1,  // sample_count
-      thread,
-      thread->comm,  // thead_comm
-      map, symbol,
-      BranchFromEntry{
-          0,        // ip
-          nullptr,  // map
-          nullptr,  // symbol
-          0,        // flags
-      },
-  };
+  SampleEntry value(ip, time, period, 0, 1, thread, map, symbol);
+
   return InsertSample(value);
 }
 
@@ -197,20 +185,12 @@
   }
   const SymbolEntry* to_symbol = FindSymbol(to_map, to_ip);
 
-  SampleEntry value = {to_ip,  // ip
-                       time, period,
-                       0,  // accumulated_period
-                       1,  // sample_count
-                       thread,
-                       thread->comm,  // thread_comm
-                       to_map,        // map
-                       to_symbol,     // symbol
-                       BranchFromEntry{
-                           from_ip,       // ip
-                           from_map,      // map
-                           from_symbol,   // symbol
-                           branch_flags,  // flags
-                       }};
+  SampleEntry value(to_ip, time, period, 0, 1, thread, to_map, to_symbol);
+  value.branch_from.ip = from_ip;
+  value.branch_from.map = from_map;
+  value.branch_from.symbol = from_symbol;
+  value.branch_from.flags = branch_flags;
+
   InsertSample(value);
 }
 
@@ -221,21 +201,8 @@
   const MapEntry* map = FindMap(thread, ip, in_kernel);
   const SymbolEntry* symbol = FindSymbol(map, ip);
 
-  SampleEntry value = {
-      ip, time,
-      0,       // period
-      period,  // accumulated_period
-      0,       // sample_count
-      thread,
-      thread->comm,  // thread_comm
-      map, symbol,
-      BranchFromEntry{
-          0,        // ip
-          nullptr,  // map
-          nullptr,  // symbol
-          0,        // flags
-      },
-  };
+  SampleEntry value(ip, time, 0, period, 0, thread, map, symbol);
+
   auto it = sample_tree_.find(&value);
   if (it != sample_tree_.end()) {
     SampleEntry* sample = *it;
@@ -265,8 +232,8 @@
   return result;
 }
 
-SampleEntry* SampleTree::AllocateSample(const SampleEntry& value) {
-  SampleEntry* sample = new SampleEntry(value);
+SampleEntry* SampleTree::AllocateSample(SampleEntry& value) {
+  SampleEntry* sample = new SampleEntry(std::move(value));
   sample_storage_.push_back(std::unique_ptr<SampleEntry>(sample));
   return sample;
 }
@@ -285,10 +252,17 @@
   return symbol;
 }
 
+void SampleTree::InsertCallChainForSample(SampleEntry* sample,
+                                          const std::vector<SampleEntry*>& callchain,
+                                          uint64_t period) {
+  sample->callchain.AddCallChain(callchain, period);
+}
+
 void SampleTree::VisitAllSamples(std::function<void(const SampleEntry&)> callback) {
   if (sorted_sample_tree_.size() != sample_tree_.size()) {
     sorted_sample_tree_.clear();
     for (auto& sample : sample_tree_) {
+      sample->callchain.SortByPeriod();
       sorted_sample_tree_.insert(sample);
     }
   }
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
index 0be8286..2e97ceb 100644
--- a/simpleperf/sample_tree.h
+++ b/simpleperf/sample_tree.h
@@ -24,6 +24,7 @@
 #include <unordered_map>
 #include <vector>
 
+#include "callchain.h"
 #include "dso.h"
 
 struct MapEntry {
@@ -50,6 +51,9 @@
   const MapEntry* map;
   const SymbolEntry* symbol;
   uint64_t flags;
+
+  BranchFromEntry() : ip(0), map(nullptr), symbol(nullptr), flags(0) {
+  }
 };
 
 struct SampleEntry {
@@ -63,6 +67,25 @@
   const MapEntry* map;
   const SymbolEntry* symbol;
   BranchFromEntry branch_from;
+  CallChainRoot callchain;  // A callchain tree representing all callchains in the sample records.
+
+  SampleEntry(uint64_t ip, uint64_t time, uint64_t period, uint64_t accumulated_period,
+              uint64_t sample_count, const ThreadEntry* thread, const MapEntry* map,
+              const SymbolEntry* symbol)
+      : ip(ip),
+        time(time),
+        period(period),
+        accumulated_period(accumulated_period),
+        sample_count(sample_count),
+        thread(thread),
+        thread_comm(thread->comm),
+        map(map),
+        symbol(symbol) {
+  }
+
+  // The data member 'callchain' can only move, not copy.
+  SampleEntry(SampleEntry&&) = default;
+  SampleEntry(SampleEntry&) = delete;
 };
 
 typedef std::function<int(const SampleEntry&, const SampleEntry&)> compare_sample_func_t;
@@ -103,6 +126,8 @@
                        uint64_t time, uint64_t period);
   SampleEntry* AddCallChainSample(int pid, int tid, uint64_t ip, uint64_t time, uint64_t period,
                                   bool in_kernel, const std::vector<SampleEntry*>& callchain);
+  void InsertCallChainForSample(SampleEntry* sample, const std::vector<SampleEntry*>& callchain,
+                                uint64_t period);
   void VisitAllSamples(std::function<void(const SampleEntry&)> callback);
 
   uint64_t TotalSamples() const {
@@ -120,7 +145,7 @@
   DsoEntry* FindUserDsoOrNew(const std::string& filename);
   const SymbolEntry* FindSymbol(const MapEntry* map, uint64_t ip);
   SampleEntry* InsertSample(SampleEntry& value);
-  SampleEntry* AllocateSample(const SampleEntry& value);
+  SampleEntry* AllocateSample(SampleEntry& value);
 
   struct SampleComparator {
     bool operator()(SampleEntry* sample1, SampleEntry* sample2) const {