Imported Scudo Standalone changes:

  - 681773f2919ddf8af0e2ff3474a443df6a15a5e2 scudo: Instead of exporting a pointer to the allocator, e... by Peter Collingbourne <peter@pcc.me.uk>
  - 041547eb4eb0fcb2155af7537aaed7f601ea6343 scudo: Table driven size classes for Android allocator. by Peter Collingbourne <peter@pcc.me.uk>
  - 9068766b9a523350060f0817c7f50df09de3a69c scudo: Add a dump of primary allocation sizes to malloc_i... by Peter Collingbourne <peter@pcc.me.uk>
  - e79c3b4c2d22256d8e66b6a908268e83bc8c4634 scudo: Fix Android build. by Peter Collingbourne <peter@pcc.me.uk>

GitOrigin-RevId: e79c3b4c2d22256d8e66b6a908268e83bc8c4634
Change-Id: I16f96df7cc8993d7b210157ce4dea7cf59642b4d
diff --git a/standalone/internal_defs.h b/standalone/internal_defs.h
index 8f6a89e..c61f8e6 100644
--- a/standalone/internal_defs.h
+++ b/standalone/internal_defs.h
@@ -29,6 +29,7 @@
 // Attributes & builtins related macros.
 
 #define INTERFACE __attribute__((visibility("default")))
+#define HIDDEN __attribute__((visibility("hidden")))
 #define WEAK __attribute__((weak))
 #define ALWAYS_INLINE inline __attribute__((always_inline))
 #define ALIAS(X) __attribute__((alias(X)))
diff --git a/standalone/size_class_map.h b/standalone/size_class_map.h
index ff587c9..46f53ae 100644
--- a/standalone/size_class_map.h
+++ b/standalone/size_class_map.h
@@ -9,11 +9,33 @@
 #ifndef SCUDO_SIZE_CLASS_MAP_H_
 #define SCUDO_SIZE_CLASS_MAP_H_
 
+#include "chunk.h"
 #include "common.h"
 #include "string_utils.h"
 
 namespace scudo {
 
+inline uptr scaledLog2(uptr Size, uptr ZeroLog, uptr LogBits) {
+  const uptr L = getMostSignificantSetBitIndex(Size);
+  const uptr LBits = (Size >> (L - LogBits)) - (1 << LogBits);
+  const uptr HBits = (L - ZeroLog) << LogBits;
+  return LBits + HBits;
+}
+
+template <typename Config> struct SizeClassMapBase {
+  static u32 getMaxCachedHint(uptr Size) {
+    DCHECK_LE(Size, MaxSize);
+    DCHECK_NE(Size, 0);
+    u32 N;
+    // Force a 32-bit division if the template parameters allow for it.
+    if (Config::MaxBytesCachedLog > 31 || Config::MaxSizeLog > 31)
+      N = static_cast<u32>((1UL << Config::MaxBytesCachedLog) / Size);
+    else
+      N = (1U << Config::MaxBytesCachedLog) / static_cast<u32>(Size);
+    return Max(1U, Min(Config::MaxNumCachedHint, N));
+  }
+};
+
 // SizeClassMap maps allocation sizes into size classes and back, in an
 // efficient table-free manner.
 //
@@ -33,22 +55,24 @@
 // of chunks that can be cached per-thread:
 // - MaxNumCachedHint is a hint for the max number of chunks cached per class.
 // - 2^MaxBytesCachedLog is the max number of bytes cached per class.
+template <typename Config>
+class FixedSizeClassMap : public SizeClassMapBase<Config> {
+  typedef SizeClassMapBase<Config> Base;
 
-template <u8 NumBits, u8 MinSizeLog, u8 MidSizeLog, u8 MaxSizeLog,
-          u32 MaxNumCachedHintT, u8 MaxBytesCachedLog>
-class SizeClassMap {
-  static const uptr MinSize = 1UL << MinSizeLog;
-  static const uptr MidSize = 1UL << MidSizeLog;
+  static const uptr MinSize = 1UL << Config::MinSizeLog;
+  static const uptr MidSize = 1UL << Config::MidSizeLog;
   static const uptr MidClass = MidSize / MinSize;
-  static const u8 S = NumBits - 1;
+  static const u8 S = Config::NumBits - 1;
   static const uptr M = (1UL << S) - 1;
 
-public:
-  static const u32 MaxNumCachedHint = MaxNumCachedHintT;
+  static const uptr SizeDelta = Chunk::getHeaderSize();
 
-  static const uptr MaxSize = 1UL << MaxSizeLog;
+public:
+  static const u32 MaxNumCachedHint = Config::MaxNumCachedHint;
+
+  static const uptr MaxSize = (1UL << Config::MaxSizeLog) + SizeDelta;
   static const uptr NumClasses =
-      MidClass + ((MaxSizeLog - MidSizeLog) << S) + 1;
+      MidClass + ((Config::MaxSizeLog - Config::MidSizeLog) << S) + 1;
   static_assert(NumClasses <= 256, "");
   static const uptr LargestClassId = NumClasses - 1;
   static const uptr BatchClassId = 0;
@@ -56,97 +80,206 @@
   static uptr getSizeByClassId(uptr ClassId) {
     DCHECK_NE(ClassId, BatchClassId);
     if (ClassId <= MidClass)
-      return ClassId << MinSizeLog;
+      return (ClassId << Config::MinSizeLog) + SizeDelta;
     ClassId -= MidClass;
     const uptr T = MidSize << (ClassId >> S);
-    return T + (T >> S) * (ClassId & M);
+    return T + (T >> S) * (ClassId & M) + SizeDelta;
   }
 
   static uptr getClassIdBySize(uptr Size) {
+    if (Size <= SizeDelta + (1 << Config::MinSizeLog))
+      return 1;
+    Size -= SizeDelta;
     DCHECK_LE(Size, MaxSize);
     if (Size <= MidSize)
-      return (Size + MinSize - 1) >> MinSizeLog;
-    Size -= 1;
-    const uptr L = getMostSignificantSetBitIndex(Size);
-    const uptr LBits = (Size >> (L - S)) - (1 << S);
-    const uptr HBits = (L - MidSizeLog) << S;
-    return MidClass + 1 + HBits + LBits;
-  }
-
-  static u32 getMaxCachedHint(uptr Size) {
-    DCHECK_LE(Size, MaxSize);
-    DCHECK_NE(Size, 0);
-    u32 N;
-    // Force a 32-bit division if the template parameters allow for it.
-    if (MaxBytesCachedLog > 31 || MaxSizeLog > 31)
-      N = static_cast<u32>((1UL << MaxBytesCachedLog) / Size);
-    else
-      N = (1U << MaxBytesCachedLog) / static_cast<u32>(Size);
-    return Max(1U, Min(MaxNumCachedHint, N));
-  }
-
-  static void print() {
-    ScopedString Buffer(1024);
-    uptr PrevS = 0;
-    uptr TotalCached = 0;
-    for (uptr I = 0; I < NumClasses; I++) {
-      if (I == BatchClassId)
-        continue;
-      const uptr S = getSizeByClassId(I);
-      if (S >= MidSize / 2 && (S & (S - 1)) == 0)
-        Buffer.append("\n");
-      const uptr D = S - PrevS;
-      const uptr P = PrevS ? (D * 100 / PrevS) : 0;
-      const uptr L = S ? getMostSignificantSetBitIndex(S) : 0;
-      const uptr Cached = getMaxCachedHint(S) * S;
-      Buffer.append(
-          "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n",
-          I, getSizeByClassId(I), D, P, L, getMaxCachedHint(S), Cached,
-          getClassIdBySize(S));
-      TotalCached += Cached;
-      PrevS = S;
-    }
-    Buffer.append("Total Cached: %zu\n", TotalCached);
-    Buffer.output();
-  }
-
-  static void validate() {
-    for (uptr C = 0; C < NumClasses; C++) {
-      if (C == BatchClassId)
-        continue;
-      const uptr S = getSizeByClassId(C);
-      CHECK_NE(S, 0U);
-      CHECK_EQ(getClassIdBySize(S), C);
-      if (C < LargestClassId)
-        CHECK_EQ(getClassIdBySize(S + 1), C + 1);
-      CHECK_EQ(getClassIdBySize(S - 1), C);
-      if (C - 1 != BatchClassId)
-        CHECK_GT(getSizeByClassId(C), getSizeByClassId(C - 1));
-    }
-    // Do not perform the loop if the maximum size is too large.
-    if (MaxSizeLog > 19)
-      return;
-    for (uptr S = 1; S <= MaxSize; S++) {
-      const uptr C = getClassIdBySize(S);
-      CHECK_LT(C, NumClasses);
-      CHECK_GE(getSizeByClassId(C), S);
-      if (C - 1 != BatchClassId)
-        CHECK_LT(getSizeByClassId(C - 1), S);
-    }
+      return (Size + MinSize - 1) >> Config::MinSizeLog;
+    return MidClass + 1 + scaledLog2(Size - 1, Config::MidSizeLog, S);
   }
 };
 
-typedef SizeClassMap<3, 5, 8, 17, 8, 10> DefaultSizeClassMap;
+template <typename Config>
+class TableSizeClassMap : public SizeClassMapBase<Config> {
+  static const u8 S = Config::NumBits - 1;
+  static const uptr M = (1UL << S) - 1;
+  static const uptr ClassesSize =
+      sizeof(Config::Classes) / sizeof(Config::Classes[0]);
 
-// TODO(kostyak): further tune class maps for Android & Fuchsia.
+  struct SizeTable {
+    constexpr SizeTable() {
+      uptr Pos = 1 << Config::MidSizeLog;
+      uptr Inc = 1 << (Config::MidSizeLog - S);
+      for (uptr i = 0; i != getTableSize(); ++i) {
+        Pos += Inc;
+        if ((Pos & (Pos - 1)) == 0)
+          Inc *= 2;
+        Tab[i] = computeClassId(Pos + Config::SizeDelta);
+      }
+    }
+
+    constexpr static u8 computeClassId(uptr Size) {
+      for (uptr i = 0; i != ClassesSize; ++i) {
+        if (Size <= Config::Classes[i])
+          return i + 1;
+      }
+      return -1;
+    }
+
+    constexpr static uptr getTableSize() {
+      return (Config::MaxSizeLog - Config::MidSizeLog) << S;
+    }
+
+    u8 Tab[getTableSize()] = {};
+  };
+
+  static constexpr SizeTable Table = {};
+
+public:
+  static const u32 MaxNumCachedHint = Config::MaxNumCachedHint;
+
+  static const uptr NumClasses = ClassesSize + 1;
+  static_assert(NumClasses < 256, "");
+  static const uptr LargestClassId = NumClasses - 1;
+  static const uptr BatchClassId = 0;
+  static const uptr MaxSize = Config::Classes[LargestClassId - 1];
+
+  static uptr getSizeByClassId(uptr ClassId) {
+    return Config::Classes[ClassId - 1];
+  }
+
+  static uptr getClassIdBySize(uptr Size) {
+    if (Size <= Config::Classes[0])
+      return 1;
+    Size -= Config::SizeDelta;
+    DCHECK_LE(Size, MaxSize);
+    if (Size <= (1 << Config::MidSizeLog))
+      return ((Size - 1) >> Config::MinSizeLog) + 1;
+    return Table.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)];
+  }
+
+  static void print() {}
+  static void validate() {}
+};
+
+struct AndroidSizeClassConfig {
 #if SCUDO_WORDSIZE == 64U
-typedef SizeClassMap<4, 4, 8, 14, 4, 10> SvelteSizeClassMap;
-typedef SizeClassMap<2, 5, 9, 16, 14, 14> AndroidSizeClassMap;
-#else
-typedef SizeClassMap<4, 3, 7, 14, 5, 10> SvelteSizeClassMap;
-typedef SizeClassMap<3, 4, 9, 16, 14, 14> AndroidSizeClassMap;
-#endif
+  // Measured using a system_server profile.
+  static const uptr NumBits = 7;
+  static const uptr MinSizeLog = 4;
+  static const uptr MidSizeLog = 6;
+  static const uptr MaxSizeLog = 16;
+  static const u32 MaxNumCachedHint = 14;
+  static const uptr MaxBytesCachedLog = 14;
 
+  static constexpr u32 Classes[] = {
+      0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000a0,
+      0x000b0, 0x000e0, 0x00110, 0x00130, 0x001a0, 0x00240, 0x00320, 0x00430,
+      0x00640, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01150, 0x01ad0, 0x02190,
+      0x03610, 0x04010, 0x04510, 0x04d10, 0x05a10, 0x07310, 0x09610, 0x10010,
+  };
+  static const uptr SizeDelta = 16;
+#else
+  // Measured using a dex2oat profile.
+  static const uptr NumBits = 8;
+  static const uptr MinSizeLog = 4;
+  static const uptr MidSizeLog = 8;
+  static const uptr MaxSizeLog = 16;
+  static const u32 MaxNumCachedHint = 14;
+  static const uptr MaxBytesCachedLog = 14;
+
+  static constexpr u32 Classes[] = {
+      0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090,
+      0x000a0, 0x000b0, 0x000c0, 0x000d0, 0x000e0, 0x000f0, 0x00100, 0x00110,
+      0x00120, 0x00140, 0x00150, 0x00170, 0x00190, 0x001c0, 0x001f0, 0x00220,
+      0x00240, 0x00260, 0x002a0, 0x002e0, 0x00310, 0x00340, 0x00380, 0x003b0,
+      0x003e0, 0x00430, 0x00490, 0x00500, 0x00570, 0x005f0, 0x00680, 0x00720,
+      0x007d0, 0x00890, 0x00970, 0x00a50, 0x00b80, 0x00cb0, 0x00e30, 0x00fb0,
+      0x011b0, 0x01310, 0x01470, 0x01790, 0x01b50, 0x01fd0, 0x02310, 0x02690,
+      0x02b10, 0x02fd0, 0x03610, 0x03e10, 0x04890, 0x05710, 0x06a90, 0x10010,
+  };
+  static const uptr SizeDelta = 16;
+#endif
+};
+
+typedef TableSizeClassMap<AndroidSizeClassConfig> AndroidSizeClassMap;
+
+struct DefaultSizeClassConfig {
+  static const uptr NumBits = 3;
+  static const uptr MinSizeLog = 5;
+  static const uptr MidSizeLog = 8;
+  static const uptr MaxSizeLog = 17;
+  static const u32 MaxNumCachedHint = 8;
+  static const uptr MaxBytesCachedLog = 10;
+};
+
+typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap;
+
+struct SvelteSizeClassConfig {
+#if SCUDO_WORDSIZE == 64U
+  static const uptr NumBits = 4;
+  static const uptr MinSizeLog = 4;
+  static const uptr MidSizeLog = 8;
+  static const uptr MaxSizeLog = 14;
+  static const u32 MaxNumCachedHint = 4;
+  static const uptr MaxBytesCachedLog = 10;
+#else
+  static const uptr NumBits = 4;
+  static const uptr MinSizeLog = 3;
+  static const uptr MidSizeLog = 7;
+  static const uptr MaxSizeLog = 14;
+  static const u32 MaxNumCachedHint = 5;
+  static const uptr MaxBytesCachedLog = 10;
+#endif
+};
+
+typedef FixedSizeClassMap<SvelteSizeClassConfig> SvelteSizeClassMap;
+
+template <typename SCMap> inline void printMap() {
+  ScopedString Buffer(1024);
+  uptr PrevS = 0;
+  uptr TotalCached = 0;
+  for (uptr I = 0; I < SCMap::NumClasses; I++) {
+    if (I == SCMap::BatchClassId)
+      continue;
+    const uptr S = SCMap::getSizeByClassId(I);
+    const uptr D = S - PrevS;
+    const uptr P = PrevS ? (D * 100 / PrevS) : 0;
+    const uptr L = S ? getMostSignificantSetBitIndex(S) : 0;
+    const uptr Cached = SCMap::getMaxCachedHint(S) * S;
+    Buffer.append(
+        "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n",
+        I, S, D, P, L, SCMap::getMaxCachedHint(S), Cached,
+        SCMap::getClassIdBySize(S));
+    TotalCached += Cached;
+    PrevS = S;
+  }
+  Buffer.append("Total Cached: %zu\n", TotalCached);
+  Buffer.output();
+}
+
+template <typename SCMap> static void validateMap() {
+  for (uptr C = 0; C < SCMap::NumClasses; C++) {
+    if (C == SCMap::BatchClassId)
+      continue;
+    const uptr S = SCMap::getSizeByClassId(C);
+    CHECK_NE(S, 0U);
+    CHECK_EQ(SCMap::getClassIdBySize(S), C);
+    if (C < SCMap::LargestClassId)
+      CHECK_EQ(SCMap::getClassIdBySize(S + 1), C + 1);
+    CHECK_EQ(SCMap::getClassIdBySize(S - 1), C);
+    if (C - 1 != SCMap::BatchClassId)
+      CHECK_GT(SCMap::getSizeByClassId(C), SCMap::getSizeByClassId(C - 1));
+  }
+  // Do not perform the loop if the maximum size is too large.
+  if (SCMap::MaxSize > (1 << 19))
+    return;
+  for (uptr S = 1; S <= SCMap::MaxSize; S++) {
+    const uptr C = SCMap::getClassIdBySize(S);
+    CHECK_LT(C, SCMap::NumClasses);
+    CHECK_GE(SCMap::getSizeByClassId(C), S);
+    if (C - 1 != SCMap::BatchClassId)
+      CHECK_LT(SCMap::getSizeByClassId(C - 1), S);
+  }
+}
 } // namespace scudo
 
 #endif // SCUDO_SIZE_CLASS_MAP_H_
diff --git a/standalone/tests/combined_test.cpp b/standalone/tests/combined_test.cpp
index f6dc3d6..488dca9 100644
--- a/standalone/tests/combined_test.cpp
+++ b/standalone/tests/combined_test.cpp
@@ -157,15 +157,16 @@
 
   // Check that reallocating a chunk to a slightly smaller or larger size
   // returns the same chunk. This requires that all the sizes we iterate on use
-  // the same block size, but that should be the case for 2048 with our default
-  // class size maps.
-  P = Allocator->allocate(DataSize, Origin);
-  memset(P, Marker, DataSize);
+  // the same block size, but that should be the case for MaxSize - 64 with our
+  // default class size maps.
+  constexpr scudo::uptr ReallocSize = MaxSize - 64;
+  P = Allocator->allocate(ReallocSize, Origin);
+  memset(P, Marker, ReallocSize);
   for (scudo::sptr Delta = -32; Delta < 32; Delta += 8) {
-    const scudo::uptr NewSize = DataSize + Delta;
+    const scudo::uptr NewSize = ReallocSize + Delta;
     void *NewP = Allocator->reallocate(P, NewSize);
     EXPECT_EQ(NewP, P);
-    for (scudo::uptr I = 0; I < DataSize - 32; I++)
+    for (scudo::uptr I = 0; I < ReallocSize - 32; I++)
       EXPECT_EQ((reinterpret_cast<char *>(NewP))[I], Marker);
     checkMemoryTaggingMaybe(Allocator.get(), NewP, NewSize, 0);
   }
@@ -343,9 +344,19 @@
 #endif
 }
 
+
+struct DeathSizeClassConfig {
+  static const scudo::uptr NumBits = 1;
+  static const scudo::uptr MinSizeLog = 10;
+  static const scudo::uptr MidSizeLog = 10;
+  static const scudo::uptr MaxSizeLog = 10;
+  static const scudo::u32 MaxNumCachedHint = 1;
+  static const scudo::uptr MaxBytesCachedLog = 10;
+};
+
 struct DeathConfig {
   // Tiny allocator, its Primary only serves chunks of 1024 bytes.
-  using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>;
+  using DeathSizeClassMap = scudo::FixedSizeClassMap<DeathSizeClassConfig>;
   typedef scudo::SizeClassAllocator64<DeathSizeClassMap, 20U> Primary;
   typedef scudo::MapAllocator<scudo::MapAllocatorNoCache> Secondary;
   template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U>;
diff --git a/standalone/tests/size_class_map_test.cpp b/standalone/tests/size_class_map_test.cpp
index 5585040..c9e173f 100644
--- a/standalone/tests/size_class_map_test.cpp
+++ b/standalone/tests/size_class_map_test.cpp
@@ -12,8 +12,8 @@
 
 template <class SizeClassMap> void testSizeClassMap() {
   typedef SizeClassMap SCMap;
-  SCMap::print();
-  SCMap::validate();
+  scudo::printMap<SCMap>();
+  scudo::validateMap<SCMap>();
 }
 
 TEST(ScudoSizeClassMapTest, DefaultSizeClassMap) {
@@ -28,12 +28,31 @@
   testSizeClassMap<scudo::AndroidSizeClassMap>();
 }
 
+
+struct OneClassSizeClassConfig {
+  static const scudo::uptr NumBits = 1;
+  static const scudo::uptr MinSizeLog = 5;
+  static const scudo::uptr MidSizeLog = 5;
+  static const scudo::uptr MaxSizeLog = 5;
+  static const scudo::u32 MaxNumCachedHint = 0;
+  static const scudo::uptr MaxBytesCachedLog = 0;
+};
+
 TEST(ScudoSizeClassMapTest, OneClassSizeClassMap) {
-  testSizeClassMap<scudo::SizeClassMap<1, 5, 5, 5, 0, 0>>();
+  testSizeClassMap<scudo::FixedSizeClassMap<OneClassSizeClassConfig>>();
 }
 
 #if SCUDO_CAN_USE_PRIMARY64
+struct LargeMaxSizeClassConfig {
+  static const scudo::uptr NumBits = 3;
+  static const scudo::uptr MinSizeLog = 4;
+  static const scudo::uptr MidSizeLog = 8;
+  static const scudo::uptr MaxSizeLog = 63;
+  static const scudo::u32 MaxNumCachedHint = 128;
+  static const scudo::uptr MaxBytesCachedLog = 16;
+};
+
 TEST(ScudoSizeClassMapTest, LargeMaxSizeClassMap) {
-  testSizeClassMap<scudo::SizeClassMap<3, 4, 8, 63, 128, 16>>();
+  testSizeClassMap<scudo::FixedSizeClassMap<LargeMaxSizeClassConfig>>();
 }
 #endif
diff --git a/standalone/tests/wrappers_c_test.cpp b/standalone/tests/wrappers_c_test.cpp
index 976ac4f..d4ba7d7 100644
--- a/standalone/tests/wrappers_c_test.cpp
+++ b/standalone/tests/wrappers_c_test.cpp
@@ -303,7 +303,11 @@
 #if !SCUDO_FUCHSIA
 
 TEST(ScudoWrappersCTest, MallocInfo) {
-  char Buffer[64];
+  // Use volatile so that the allocations don't get optimized away.
+  void *volatile P1 = malloc(1234);
+  void *volatile P2 = malloc(4321);
+
+  char Buffer[16384];
   FILE *F = fmemopen(Buffer, sizeof(Buffer), "w+");
   EXPECT_NE(F, nullptr);
   errno = 0;
@@ -311,6 +315,11 @@
   EXPECT_EQ(errno, 0);
   fclose(F);
   EXPECT_EQ(strncmp(Buffer, "<malloc version=\"scudo-", 23), 0);
+  EXPECT_NE(nullptr, strstr(Buffer, "<alloc size=\"1234\" count=\""));
+  EXPECT_NE(nullptr, strstr(Buffer, "<alloc size=\"4321\" count=\""));
+
+  free(P1);
+  free(P2);
 }
 
 TEST(ScudoWrappersCTest, Fork) {
diff --git a/standalone/tools/compute_size_class_config.cpp b/standalone/tools/compute_size_class_config.cpp
new file mode 100644
index 0000000..82f37b6
--- /dev/null
+++ b/standalone/tools/compute_size_class_config.cpp
@@ -0,0 +1,161 @@
+//===-- compute_size_class_config.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <vector>
+
+struct Alloc {
+  size_t size, count;
+};
+
+size_t measureWastage(const std::vector<Alloc> &allocs,
+                       const std::vector<size_t> &classes,
+                       size_t pageSize,
+                       size_t headerSize) {
+  size_t totalWastage = 0;
+  for (auto &a : allocs) {
+    size_t sizePlusHeader = a.size + headerSize;
+    size_t wastage = -1ull;
+    for (auto c : classes)
+      if (c >= sizePlusHeader && c - sizePlusHeader < wastage)
+        wastage = c - sizePlusHeader;
+    if (wastage == -1ull)
+      continue;
+    if (wastage > 2 * pageSize)
+      wastage = 2 * pageSize;
+    totalWastage += wastage * a.count;
+  }
+  return totalWastage;
+}
+
+void readAllocs(std::vector<Alloc> &allocs, const char *path) {
+  FILE *f = fopen(path, "r");
+  if (!f) {
+    fprintf(stderr, "compute_size_class_config: could not open %s: %s\n", path,
+            strerror(errno));
+    exit(1);
+  }
+
+  const char header[] = "<malloc version=\"scudo-1\">\n";
+  char buf[sizeof(header) - 1];
+  if (fread(buf, 1, sizeof(header) - 1, f) != sizeof(header) - 1 ||
+      memcmp(buf, header, sizeof(header) - 1) != 0) {
+    fprintf(stderr, "compute_size_class_config: invalid input format\n");
+    exit(1);
+  }
+
+  Alloc a;
+  while (fscanf(f, "<alloc size=\"%zu\" count=\"%zu\"/>\n", &a.size, &a.count) == 2)
+    allocs.push_back(a);
+  fclose(f);
+}
+
+size_t log2Floor(size_t x) { return sizeof(long) * 8 - 1 - __builtin_clzl(x); }
+
+void usage() {
+  fprintf(stderr,
+          "usage: compute_size_class_config [-p pageSize] [-c largestClass] "
+          "[-h headerSize] [-n numClasses] [-b numBits] profile...\n");
+  exit(1);
+}
+
+int main(int argc, char **argv) {
+  size_t pageSize = 4096;
+  size_t largestClass = 65552;
+  size_t headerSize = 16;
+  size_t numClasses = 32;
+  size_t numBits = 5;
+
+  std::vector<Alloc> allocs;
+  for (size_t i = 1; i != argc;) {
+    auto matchArg = [&](size_t &arg, const char *name) {
+      if (strcmp(argv[i], name) == 0) {
+        if (i + 1 != argc) {
+          arg = atoi(argv[i + 1]);
+          i += 2;
+        } else {
+          usage();
+        }
+        return true;
+      }
+      return false;
+    };
+    if (matchArg(pageSize, "-p") || matchArg(largestClass, "-c") ||
+        matchArg(headerSize, "-h") || matchArg(numClasses, "-n") ||
+        matchArg(numBits, "-b"))
+      continue;
+    readAllocs(allocs, argv[i]);
+    ++i;
+  }
+
+  if (allocs.empty())
+    usage();
+
+  std::vector<size_t> classes;
+  classes.push_back(largestClass);
+  for (size_t i = 1; i != numClasses; ++i) {
+    size_t minWastage = -1ull;
+    size_t minWastageClass;
+    for (size_t newClass = 16; newClass != largestClass; newClass += 16) {
+      // Skip classes with more than numBits bits, ignoring leading or trailing
+      // zero bits.
+      if (__builtin_ctzl(newClass - headerSize) +
+              __builtin_clzl(newClass - headerSize) <
+          sizeof(long) * 8 - numBits)
+        continue;
+
+      classes.push_back(newClass);
+      size_t newWastage = measureWastage(allocs, classes, pageSize, headerSize);
+      classes.pop_back();
+      if (newWastage < minWastage) {
+        minWastage = newWastage;
+        minWastageClass = newClass;
+      }
+    }
+    classes.push_back(minWastageClass);
+  }
+
+  std::sort(classes.begin(), classes.end());
+  size_t minSizeLog = log2Floor(headerSize);
+  size_t midSizeIndex = 0;
+  while (classes[midSizeIndex + 1] - classes[midSizeIndex] == (1 << minSizeLog))
+    midSizeIndex++;
+  size_t midSizeLog = log2Floor(classes[midSizeIndex] - headerSize);
+  size_t maxSizeLog = log2Floor(classes.back() - headerSize - 1) + 1;
+
+  printf(R"(// wastage = %zu
+
+struct MySizeClassConfig {
+  static const uptr NumBits = %zu;
+  static const uptr MinSizeLog = %zu;
+  static const uptr MidSizeLog = %zu;
+  static const uptr MaxSizeLog = %zu;
+  static const u32 MaxNumCachedHint = 14;
+  static const uptr MaxBytesCachedLog = 14;
+
+  static constexpr u32 Classes[] = {)",
+         measureWastage(allocs, classes, pageSize, headerSize), numBits,
+         minSizeLog, midSizeLog, maxSizeLog);
+  for (size_t i = 0; i != classes.size(); ++i) {
+    if ((i % 8) == 0)
+      printf("\n      ");
+    else
+      printf(" ");
+    printf("0x%05zx,", classes[i]);
+  }
+  printf(R"(
+  };
+  static const uptr SizeDelta = %zu;
+};
+)", headerSize);
+}
diff --git a/standalone/wrappers_c.cpp b/standalone/wrappers_c.cpp
index 93a666c..098cc08 100644
--- a/standalone/wrappers_c.cpp
+++ b/standalone/wrappers_c.cpp
@@ -22,13 +22,11 @@
 #define SCUDO_ALLOCATOR Allocator
 
 extern "C" void SCUDO_PREFIX(malloc_postinit)();
-static scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)>
-    SCUDO_ALLOCATOR;
-// Pointer to the static allocator so that the C++ wrappers can access it.
+
+// Export the static allocator so that the C++ wrappers can access it.
 // Technically we could have a completely separated heap for C & C++ but in
 // reality the amount of cross pollination between the two is staggering.
-scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> *
-    CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR;
+scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR;
 
 #include "wrappers_c.inc"
 
diff --git a/standalone/wrappers_c.inc b/standalone/wrappers_c.inc
index 555f17d..91f615d 100644
--- a/standalone/wrappers_c.inc
+++ b/standalone/wrappers_c.inc
@@ -180,8 +180,23 @@
 }
 
 INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) {
-  fputs("<malloc version=\"scudo-1\">", stream);
-  fputs("</malloc>", stream);
+  const scudo::uptr max_size =
+      decltype(SCUDO_ALLOCATOR)::PrimaryT::SizeClassMap::MaxSize;
+  auto *sizes = static_cast<scudo::uptr *>(
+      SCUDO_PREFIX(calloc)(max_size, sizeof(scudo::uptr)));
+  auto callback = [](uintptr_t, size_t size, void* arg) {
+    auto *sizes = reinterpret_cast<scudo::uptr *>(arg);
+    if (size < max_size)
+      sizes[size]++;
+  };
+  SCUDO_ALLOCATOR.iterateOverChunks(0, -1ul, callback, sizes);
+
+  fputs("<malloc version=\"scudo-1\">\n", stream);
+  for (scudo::uptr i = 0; i != max_size; ++i)
+    if (sizes[i])
+      fprintf(stream, "<alloc size=\"%lu\" count=\"%lu\"/>\n", i, sizes[i]);
+  fputs("</malloc>\n", stream);
+  SCUDO_PREFIX(free)(sizes);
   return 0;
 }
 
diff --git a/standalone/wrappers_c_bionic.cpp b/standalone/wrappers_c_bionic.cpp
index cab565b..7a012a2 100644
--- a/standalone/wrappers_c_bionic.cpp
+++ b/standalone/wrappers_c_bionic.cpp
@@ -25,11 +25,6 @@
 extern "C" void SCUDO_PREFIX(malloc_postinit)();
 static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)>
     SCUDO_ALLOCATOR;
-// Pointer to the static allocator so that the C++ wrappers can access it.
-// Technically we could have a completely separated heap for C & C++ but in
-// reality the amount of cross pollination between the two is staggering.
-scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> *
-    CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR;
 
 #include "wrappers_c.inc"
 
@@ -44,11 +39,6 @@
 static scudo::Allocator<scudo::AndroidSvelteConfig,
                         SCUDO_PREFIX(malloc_postinit)>
     SCUDO_ALLOCATOR;
-// Pointer to the static allocator so that the C++ wrappers can access it.
-// Technically we could have a completely separated heap for C & C++ but in
-// reality the amount of cross pollination between the two is staggering.
-scudo::Allocator<scudo::AndroidSvelteConfig, SCUDO_PREFIX(malloc_postinit)> *
-    CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR;
 
 #include "wrappers_c.inc"
 
diff --git a/standalone/wrappers_cpp.cpp b/standalone/wrappers_cpp.cpp
index 1da5385..adb1041 100644
--- a/standalone/wrappers_cpp.cpp
+++ b/standalone/wrappers_cpp.cpp
@@ -16,7 +16,7 @@
 #include <stdint.h>
 
 extern "C" void malloc_postinit();
-extern scudo::Allocator<scudo::Config, malloc_postinit> *AllocatorPtr;
+extern HIDDEN scudo::Allocator<scudo::Config, malloc_postinit> Allocator;
 
 namespace std {
 struct nothrow_t {};
@@ -24,85 +24,85 @@
 } // namespace std
 
 INTERFACE WEAK void *operator new(size_t size) {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New);
+  return Allocator.allocate(size, scudo::Chunk::Origin::New);
 }
 INTERFACE WEAK void *operator new[](size_t size) {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray);
+  return Allocator.allocate(size, scudo::Chunk::Origin::NewArray);
 }
 INTERFACE WEAK void *operator new(size_t size,
                                   std::nothrow_t const &) NOEXCEPT {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New);
+  return Allocator.allocate(size, scudo::Chunk::Origin::New);
 }
 INTERFACE WEAK void *operator new[](size_t size,
                                     std::nothrow_t const &) NOEXCEPT {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray);
+  return Allocator.allocate(size, scudo::Chunk::Origin::NewArray);
 }
 INTERFACE WEAK void *operator new(size_t size, std::align_val_t align) {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New,
-                                static_cast<scudo::uptr>(align));
+  return Allocator.allocate(size, scudo::Chunk::Origin::New,
+                            static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align) {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray,
-                                static_cast<scudo::uptr>(align));
+  return Allocator.allocate(size, scudo::Chunk::Origin::NewArray,
+                            static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void *operator new(size_t size, std::align_val_t align,
                                   std::nothrow_t const &) NOEXCEPT {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::New,
-                                static_cast<scudo::uptr>(align));
+  return Allocator.allocate(size, scudo::Chunk::Origin::New,
+                            static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void *operator new[](size_t size, std::align_val_t align,
                                     std::nothrow_t const &) NOEXCEPT {
-  return AllocatorPtr->allocate(size, scudo::Chunk::Origin::NewArray,
-                                static_cast<scudo::uptr>(align));
+  return Allocator.allocate(size, scudo::Chunk::Origin::NewArray,
+                            static_cast<scudo::uptr>(align));
 }
 
 INTERFACE WEAK void operator delete(void *ptr)NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New);
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::New);
 }
 INTERFACE WEAK void operator delete[](void *ptr) NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray);
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray);
 }
 INTERFACE WEAK void operator delete(void *ptr, std::nothrow_t const &)NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New);
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::New);
 }
 INTERFACE WEAK void operator delete[](void *ptr,
                                       std::nothrow_t const &) NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray);
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray);
 }
 INTERFACE WEAK void operator delete(void *ptr, size_t size)NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, size);
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size);
 }
 INTERFACE WEAK void operator delete[](void *ptr, size_t size) NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, size);
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size);
 }
 INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align)NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, 0,
-                           static_cast<scudo::uptr>(align));
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0,
+                       static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void operator delete[](void *ptr,
                                       std::align_val_t align) NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, 0,
-                           static_cast<scudo::uptr>(align));
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, 0,
+                       static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void operator delete(void *ptr, std::align_val_t align,
                                     std::nothrow_t const &)NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, 0,
-                           static_cast<scudo::uptr>(align));
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::New, 0,
+                       static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void operator delete[](void *ptr, std::align_val_t align,
                                       std::nothrow_t const &) NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, 0,
-                           static_cast<scudo::uptr>(align));
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, 0,
+                       static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void operator delete(void *ptr, size_t size,
                                     std::align_val_t align)NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::New, size,
-                           static_cast<scudo::uptr>(align));
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::New, size,
+                       static_cast<scudo::uptr>(align));
 }
 INTERFACE WEAK void operator delete[](void *ptr, size_t size,
                                       std::align_val_t align) NOEXCEPT {
-  AllocatorPtr->deallocate(ptr, scudo::Chunk::Origin::NewArray, size,
-                           static_cast<scudo::uptr>(align));
+  Allocator.deallocate(ptr, scudo::Chunk::Origin::NewArray, size,
+                       static_cast<scudo::uptr>(align));
 }
 
 #endif // !SCUDO_ANDROID || !_BIONIC