Combine leaks with same stacktrace

Combine similar leaks (those with identical stack traces) into a single
leak report, and sort the resulting leaks by total leak size across all
similar leaks and their references.

Bug: 27208635
Change-Id: Ia2bf2ccf3fcbc110d1c7ba60e3b77348d1c63d8d
(cherry picked from commit 7a22e81c20e9a28b9cf7b99e0f46659a2b2a9de7)
diff --git a/libmemunreachable/Allocator.h b/libmemunreachable/Allocator.h
index 3b5504d..a8f579e 100644
--- a/libmemunreachable/Allocator.h
+++ b/libmemunreachable/Allocator.h
@@ -24,6 +24,7 @@
 #include <map>
 #include <memory>
 #include <set>
+#include <unordered_map>
 #include <unordered_set>
 #include <vector>
 extern std::atomic<int> heap_count;
@@ -212,6 +213,9 @@
 template<class Key, class T, class Compare = std::less<Key>>
 using map = std::map<Key, T, Compare, Allocator<std::pair<const Key, T>>>;
 
+template<class Key, class T, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>>
+using unordered_map = std::unordered_map<Key, T, Hash, KeyEqual, Allocator<std::pair<const Key, T>>>;
+
 template<class Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>>
 using unordered_set = std::unordered_set<Key, Hash, KeyEqual, Allocator<Key>>;
 
diff --git a/libmemunreachable/Leak.h b/libmemunreachable/Leak.h
new file mode 100644
index 0000000..eaeeea7
--- /dev/null
+++ b/libmemunreachable/Leak.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBMEMUNREACHABLE_LEAK_H_
+#define LIBMEMUNREACHABLE_LEAK_H_
+
+#include <functional>
+#include <vector>
+
+#include "memunreachable/memunreachable.h"
+
+// Custom std::hash specialization so that Leak::Backtrace can be used
+// as a key in std::unordered_map.
+namespace std {
+
+template<>
+struct hash<Leak::Backtrace> {
+  std::size_t operator()(const Leak::Backtrace& key) const {
+    std::size_t seed = 0;
+
+    hash_combine(seed, key.num_frames);
+    for (size_t i = 0; i < key.num_frames; i++) {
+      hash_combine(seed, key.frames[i]);
+    }
+
+    return seed;
+  }
+
+ private:
+  template<typename T>
+  inline void hash_combine(std::size_t& seed, const T& v) const {
+    std::hash<T> hasher;
+    seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+  }
+};
+
+}  // namespace std
+
+static bool operator==(const Leak::Backtrace& lhs, const Leak::Backtrace& rhs) {
+  return (lhs.num_frames == rhs.num_frames) &&
+      memcmp(lhs.frames, rhs.frames, lhs.num_frames * sizeof(lhs.frames[0])) == 0;
+}
+
+#endif
diff --git a/libmemunreachable/LeakFolding.cpp b/libmemunreachable/LeakFolding.cpp
index 0b4e7dd..be4d20c 100644
--- a/libmemunreachable/LeakFolding.cpp
+++ b/libmemunreachable/LeakFolding.cpp
@@ -111,7 +111,7 @@
 }
 
 bool LeakFolding::Leaked(allocator::vector<LeakFolding::Leak>& leaked,
-    size_t limit, size_t* num_leaks_out, size_t* leak_bytes_out) {
+    size_t* num_leaks_out, size_t* leak_bytes_out) {
   size_t num_leaks = 0;
   size_t leak_bytes = 0;
   for (auto& it : leak_map_) {
@@ -120,15 +120,12 @@
     leak_bytes += leak.range.size();
   }
 
-  size_t n = 0;
   for (auto& it : leak_map_) {
     const LeakInfo& leak = it.second;
     if (leak.scc->dominator) {
-      if (n++ < limit) {
-        leaked.emplace_back(Leak{leak.range,
-          leak.scc->cuumulative_count - 1,
-          leak.scc->cuumulative_size - leak.range.size()});
-      }
+      leaked.emplace_back(Leak{leak.range,
+        leak.scc->cuumulative_count - 1,
+        leak.scc->cuumulative_size - leak.range.size()});
     }
   }
 
diff --git a/libmemunreachable/LeakFolding.h b/libmemunreachable/LeakFolding.h
index 65e9916..732d3f2 100644
--- a/libmemunreachable/LeakFolding.h
+++ b/libmemunreachable/LeakFolding.h
@@ -33,7 +33,7 @@
     size_t referenced_size;
   };
 
-  bool Leaked(allocator::vector<Leak>& leaked, size_t limit,
+  bool Leaked(allocator::vector<Leak>& leaked,
       size_t* num_leaks_out, size_t* leak_bytes_out);
 
  private:
diff --git a/libmemunreachable/MemUnreachable.cpp b/libmemunreachable/MemUnreachable.cpp
index 7e15e11..a8be855 100644
--- a/libmemunreachable/MemUnreachable.cpp
+++ b/libmemunreachable/MemUnreachable.cpp
@@ -21,12 +21,14 @@
 #include <mutex>
 #include <string>
 #include <sstream>
+#include <unordered_map>
 
 #include <backtrace.h>
 #include <android-base/macros.h>
 
 #include "Allocator.h"
 #include "HeapWalker.h"
+#include "Leak.h"
 #include "LeakFolding.h"
 #include "LeakPipe.h"
 #include "ProcessMappings.h"
@@ -118,8 +120,8 @@
   return true;
 }
 
-bool MemUnreachable::GetUnreachableMemory(allocator::vector<Leak>& leaks, size_t limit,
-    size_t* num_leaks, size_t* leak_bytes) {
+bool MemUnreachable::GetUnreachableMemory(allocator::vector<Leak>& leaks,
+    size_t limit, size_t* num_leaks, size_t* leak_bytes) {
   ALOGI("sweeping process %d for unreachable memory", pid_);
   leaks.clear();
 
@@ -127,6 +129,14 @@
     return false;
   }
 
+
+  allocator::vector<Range> leaked1{allocator_};
+  heap_walker_.Leaked(leaked1, 0, num_leaks, leak_bytes);
+
+  ALOGI("sweeping done");
+
+  ALOGI("folding related leaks");
+
   LeakFolding folding(allocator_, heap_walker_);
   if (!folding.FoldLeaks()) {
     return false;
@@ -134,27 +144,59 @@
 
   allocator::vector<LeakFolding::Leak> leaked{allocator_};
 
-  if (!folding.Leaked(leaked, limit, num_leaks, leak_bytes)) {
+  if (!folding.Leaked(leaked, num_leaks, leak_bytes)) {
     return false;
   }
 
-  for (auto it = leaked.begin(); it != leaked.end(); it++) {
-    Leak leak{};
-    leak.begin = it->range.begin;
-    leak.size = it->range.size();
-    leak.referenced_count = it->referenced_count;
-    leak.referenced_size = it->referenced_size;
-    memcpy(leak.contents, reinterpret_cast<void*>(it->range.begin),
-        std::min(leak.size, Leak::contents_length));
-    ssize_t num_backtrace_frames = malloc_backtrace(reinterpret_cast<void*>(it->range.begin),
-        leak.backtrace_frames, leak.backtrace_length);
+  allocator::unordered_map<Leak::Backtrace, Leak*> backtrace_map{allocator_};
+
+  // Prevent reallocations of backing memory so we can store pointers into it
+  // in backtrace_map.
+  leaks.reserve(leaked.size());
+
+  for (auto& it: leaked) {
+    leaks.emplace_back();
+    Leak* leak = &leaks.back();
+
+    ssize_t num_backtrace_frames = malloc_backtrace(reinterpret_cast<void*>(it.range.begin),
+        leak->backtrace.frames, leak->backtrace.max_frames);
     if (num_backtrace_frames > 0) {
-      leak.num_backtrace_frames = num_backtrace_frames;
+      leak->backtrace.num_frames = num_backtrace_frames;
+
+      auto inserted = backtrace_map.emplace(leak->backtrace, leak);
+      if (!inserted.second) {
+        // Leak with same backtrace already exists, drop this one and
+        // increment similar counts on the existing one.
+        leaks.pop_back();
+        Leak* similar_leak = inserted.first->second;
+        similar_leak->similar_count++;
+        similar_leak->similar_size += it.range.size();
+        similar_leak->similar_referenced_count += it.referenced_count;
+        similar_leak->similar_referenced_size += it.referenced_size;
+        similar_leak->total_size += it.range.size();
+        similar_leak->total_size += it.referenced_size;
+        continue;
+      }
     }
-    leaks.emplace_back(leak);
+
+    leak->begin = it.range.begin;
+    leak->size = it.range.size();
+    leak->referenced_count = it.referenced_count;
+    leak->referenced_size = it.referenced_size;
+    leak->total_size = leak->size + leak->referenced_size;
+    memcpy(leak->contents, reinterpret_cast<void*>(it.range.begin),
+        std::min(leak->size, Leak::contents_length));
   }
 
-  ALOGI("sweeping done");
+  ALOGI("folding done");
+
+  std::sort(leaks.begin(), leaks.end(), [](const Leak& a, const Leak& b) {
+    return a.total_size > b.total_size;
+  });
+
+  if (leaks.size() > limit) {
+    leaks.resize(limit);
+  }
 
   return true;
 }
@@ -216,6 +258,11 @@
   return true;
 }
 
+template<typename T>
+static inline const char* plural(T val) {
+  return (val == 1) ? "" : "s";
+}
+
 bool GetUnreachableMemory(UnreachableMemoryInfo& info, size_t limit) {
   int parent_pid = getpid();
   int parent_tid = gettid();
@@ -352,9 +399,8 @@
 
   ALOGI("unreachable memory detection done");
   ALOGE("%zu bytes in %zu allocation%s unreachable out of %zu bytes in %zu allocation%s",
-      info.leak_bytes, info.num_leaks, info.num_leaks == 1 ? "" : "s",
-      info.allocation_bytes, info.num_allocations, info.num_allocations == 1 ? "" : "s");
-
+      info.leak_bytes, info.num_leaks, plural(info.num_leaks),
+      info.allocation_bytes, info.num_allocations, plural(info.num_allocations));
   return true;
 }
 
@@ -365,12 +411,24 @@
   oss << "  " << std::dec << size;
   oss << " bytes unreachable at ";
   oss << std::hex << begin;
-  if (referenced_count > 0) {
-    oss << " referencing " << std::dec << referenced_size << " unreachable bytes";
-    oss << " in " << referenced_count;
-    oss << " allocation" << ((referenced_count == 1) ? "" : "s");
-  }
   oss << std::endl;
+  if (referenced_count > 0) {
+    oss << std::dec;
+    oss << "   referencing " << referenced_size << " unreachable bytes";
+    oss << " in " << referenced_count << " allocation" << plural(referenced_count);
+    oss << std::endl;
+  }
+  if (similar_count > 0) {
+    oss << std::dec;
+    oss << "   and " << similar_size << " similar unreachable bytes";
+    oss << " in " << similar_count << " allocation" << plural(similar_count);
+    oss << std::endl;
+    if (similar_referenced_count > 0) {
+      oss << "   referencing " << similar_referenced_size << " unreachable bytes";
+      oss << " in " << similar_referenced_count << " allocation" << plural(similar_referenced_count);
+      oss << std::endl;
+    }
+  }
 
   if (log_contents) {
     const int bytes_per_line = 16;
@@ -379,7 +437,7 @@
     if (bytes == size) {
       oss << "   contents:" << std::endl;
     } else {
-      oss << "  first " << bytes << " bytes of contents:" << std::endl;
+      oss << "   first " << bytes << " bytes of contents:" << std::endl;
     }
 
     for (size_t i = 0; i < bytes; i += bytes_per_line) {
@@ -403,8 +461,8 @@
       oss << std::endl;
     }
   }
-  if (num_backtrace_frames > 0) {
-    oss << backtrace_string(backtrace_frames, num_backtrace_frames);
+  if (backtrace.num_frames > 0) {
+    oss << backtrace_string(backtrace.frames, backtrace.num_frames);
   }
 
   return oss.str();
@@ -413,11 +471,12 @@
 std::string UnreachableMemoryInfo::ToString(bool log_contents) const {
   std::ostringstream oss;
   oss << "  " << leak_bytes << " bytes in ";
-  oss << num_leaks << " unreachable allocation" << (num_leaks == 1 ? "" : "s");
+  oss << num_leaks << " unreachable allocation" << plural(num_leaks);
   oss << std::endl;
 
   for (auto it = leaks.begin(); it != leaks.end(); it++) {
       oss << it->ToString(log_contents);
+      oss << std::endl;
   }
 
   return oss.str();
diff --git a/libmemunreachable/include/memunreachable/memunreachable.h b/libmemunreachable/include/memunreachable/memunreachable.h
index 60d1b91..9b227fd 100644
--- a/libmemunreachable/include/memunreachable/memunreachable.h
+++ b/libmemunreachable/include/memunreachable/memunreachable.h
@@ -31,13 +31,22 @@
   size_t referenced_count;
   size_t referenced_size;
 
-  size_t num_backtrace_frames;
+  size_t similar_count;
+  size_t similar_size;
+  size_t similar_referenced_count;
+  size_t similar_referenced_size;
+
+  size_t total_size;
 
   static const size_t contents_length = 32;
   char contents[contents_length];
 
-  static const size_t backtrace_length = 16;
-  uintptr_t backtrace_frames[backtrace_length];
+  struct Backtrace {
+    size_t num_frames;
+
+    static const size_t max_frames = 16;
+    uintptr_t frames[max_frames];
+  } backtrace;
 
   std::string ToString(bool log_contents) const;
 };
diff --git a/libmemunreachable/tests/LeakFolding_test.cpp b/libmemunreachable/tests/LeakFolding_test.cpp
index c5aa1ed..879a3a0 100644
--- a/libmemunreachable/tests/LeakFolding_test.cpp
+++ b/libmemunreachable/tests/LeakFolding_test.cpp
@@ -56,7 +56,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(1U, num_leaks);
   EXPECT_EQ(sizeof(uintptr_t), leaked_bytes);
@@ -81,7 +81,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(2U, num_leaks);
   EXPECT_EQ(2*sizeof(uintptr_t), leaked_bytes);
@@ -110,7 +110,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(2U, num_leaks);
   EXPECT_EQ(2*sizeof(uintptr_t), leaked_bytes);
@@ -141,7 +141,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(3U, num_leaks);
   EXPECT_EQ(3*sizeof(uintptr_t), leaked_bytes);
@@ -172,7 +172,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(3U, num_leaks);
   EXPECT_EQ(5*sizeof(uintptr_t), leaked_bytes);
@@ -215,7 +215,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(6U, num_leaks);
   EXPECT_EQ(6*sizeof(uintptr_t), leaked_bytes);
@@ -251,7 +251,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(4U, num_leaks);
   EXPECT_EQ(4*sizeof(uintptr_t), leaked_bytes);
@@ -289,11 +289,11 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(n, num_leaks);
   EXPECT_EQ(n * sizeof(uintptr_t), leaked_bytes);
-  ASSERT_EQ(100U, leaked.size());
+  ASSERT_EQ(1000U, leaked.size());
   EXPECT_EQ(n - 1, leaked[0].referenced_count);
   EXPECT_EQ((n - 1) * sizeof(uintptr_t), leaked[0].referenced_size);
 }
@@ -326,7 +326,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(n + 1, num_leaks);
   EXPECT_EQ((n + 1) * sizeof(uintptr_t), leaked_bytes);
@@ -368,7 +368,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(4U, num_leaks);
   EXPECT_EQ(5 * sizeof(uintptr_t), leaked_bytes);
@@ -411,7 +411,7 @@
   allocator::vector<LeakFolding::Leak> leaked(heap_);
   size_t num_leaks = 0;
   size_t leaked_bytes = 0;
-  ASSERT_EQ(true, folding.Leaked(leaked, 100, &num_leaks, &leaked_bytes));
+  ASSERT_EQ(true, folding.Leaked(leaked, &num_leaks, &leaked_bytes));
 
   EXPECT_EQ(4U, num_leaks);
   EXPECT_EQ(8 * sizeof(uintptr_t), leaked_bytes);