[scudo] Adjust page map buffer size am: 146abcd216

Original change: https://android-review.googlesource.com/c/platform/external/scudo/+/2478535

Change-Id: I8f8db1b2d5aea82270436f289c6193aaeef94e48
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/standalone/release.cpp b/standalone/release.cpp
index 3f40dbe..938bb41 100644
--- a/standalone/release.cpp
+++ b/standalone/release.cpp
@@ -10,7 +10,7 @@
 
 namespace scudo {
 
-HybridMutex RegionPageMap::Mutex = {};
-uptr RegionPageMap::StaticBuffer[RegionPageMap::StaticBufferCount];
+BufferPool<RegionPageMap::StaticBufferCount, RegionPageMap::StaticBufferSize>
+    RegionPageMap::Buffers;
 
 } // namespace scudo
diff --git a/standalone/release.h b/standalone/release.h
index b0151a4..c3b0a77 100644
--- a/standalone/release.h
+++ b/standalone/release.h
@@ -49,6 +49,99 @@
   MapPlatformData *Data = nullptr;
 };
 
+// A buffer pool which holds a fixed number of static buffers for fast buffer
+// allocation. If the request size is greater than `StaticBufferSize`, it'll
+// delegate the allocation to map().
+template <uptr StaticBufferCount, uptr StaticBufferSize> class BufferPool {
+public:
+  // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while
+  // extracting the least significant bit from the `Mask`.
+  static_assert(StaticBufferCount < SCUDO_WORDSIZE, "");
+  static_assert(isAligned(StaticBufferSize, SCUDO_CACHE_LINE_SIZE), "");
+
+  // Return a buffer which is at least `BufferSize`.
+  uptr *getBuffer(const uptr BufferSize) {
+    if (UNLIKELY(BufferSize > StaticBufferSize))
+      return getDynamicBuffer(BufferSize);
+
+    uptr index;
+    {
+      // TODO: In general, we expect this operation should be fast so the
+      // waiting thread won't be put into sleep. The HybridMutex does implement
+      // the busy-waiting but we may want to review the performance and see if
+      // we need an explict spin lock here.
+      ScopedLock L(Mutex);
+      index = getLeastSignificantSetBitIndex(Mask);
+      if (index < StaticBufferCount)
+        Mask ^= static_cast<uptr>(1) << index;
+    }
+
+    if (index >= StaticBufferCount)
+      return getDynamicBuffer(BufferSize);
+
+    const uptr Offset = index * StaticBufferSize;
+    memset(&RawBuffer[Offset], 0, StaticBufferSize);
+    return &RawBuffer[Offset];
+  }
+
+  void releaseBuffer(uptr *Buffer, const uptr BufferSize) {
+    const uptr index = getStaticBufferIndex(Buffer, BufferSize);
+    if (index < StaticBufferCount) {
+      ScopedLock L(Mutex);
+      DCHECK_EQ((Mask & (static_cast<uptr>(1) << index)), 0U);
+      Mask |= static_cast<uptr>(1) << index;
+    } else {
+      unmap(reinterpret_cast<void *>(Buffer),
+            roundUp(BufferSize, getPageSizeCached()));
+    }
+  }
+
+  bool isStaticBufferTestOnly(uptr *Buffer, uptr BufferSize) {
+    return getStaticBufferIndex(Buffer, BufferSize) < StaticBufferCount;
+  }
+
+private:
+  uptr getStaticBufferIndex(uptr *Buffer, uptr BufferSize) {
+    if (UNLIKELY(BufferSize > StaticBufferSize))
+      return StaticBufferCount;
+
+    const uptr BufferBase = reinterpret_cast<uptr>(Buffer);
+    const uptr RawBufferBase = reinterpret_cast<uptr>(RawBuffer);
+
+    if (BufferBase < RawBufferBase ||
+        BufferBase >= RawBufferBase + sizeof(RawBuffer)) {
+      return StaticBufferCount;
+    }
+
+    DCHECK_LE(BufferSize, StaticBufferSize);
+    DCHECK_LE(BufferBase + BufferSize, RawBufferBase + sizeof(RawBuffer));
+    DCHECK_EQ((BufferBase - RawBufferBase) % StaticBufferSize, 0U);
+
+    const uptr index =
+        (BufferBase - RawBufferBase) / (StaticBufferSize * sizeof(uptr));
+    DCHECK_LT(index, StaticBufferCount);
+    return index;
+  }
+
+  uptr *getDynamicBuffer(const uptr BufferSize) {
+    // When using a heap-based buffer, precommit the pages backing the
+    // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization
+    // where page fault exceptions are skipped as the allocated memory
+    // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a
+    // performance benefit on other platforms.
+    const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0);
+    return reinterpret_cast<uptr *>(
+        map(nullptr, roundUp(BufferSize, getPageSizeCached()), "scudo:counters",
+            MmapFlags, &MapData));
+  }
+
+  HybridMutex Mutex;
+  // '1' means that buffer index is not used. '0' means the buffer is in use.
+  uptr Mask GUARDED_BY(Mutex) = ~static_cast<uptr>(0);
+  uptr RawBuffer[StaticBufferCount * StaticBufferSize] GUARDED_BY(Mutex);
+  [[no_unique_address]] MapPlatformData MapData = {};
+};
+
 // A Region page map is used to record the usage of pages in the regions. It
 // implements a packed array of Counters. Each counter occupies 2^N bits, enough
 // to store counter's MaxValue. Ctor will try to use a static buffer first, and
@@ -76,11 +169,7 @@
   ~RegionPageMap() {
     if (!isAllocated())
       return;
-    if (Buffer == &StaticBuffer[0])
-      Mutex.unlock();
-    else
-      unmap(reinterpret_cast<void *>(Buffer),
-            roundUp(BufferSize, getPageSizeCached()));
+    Buffers.releaseBuffer(Buffer, BufferSize);
     Buffer = nullptr;
   }
 
@@ -88,8 +177,7 @@
   // specify the thread-safety attribute properly in current code structure.
   // Besides, it's the only place we may want to check thread safety. Therefore,
   // it's fine to bypass the thread-safety analysis now.
-  void reset(uptr NumberOfRegion, uptr CountersPerRegion,
-             uptr MaxValue) NO_THREAD_SAFETY_ANALYSIS {
+  void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) {
     DCHECK_GT(NumberOfRegion, 0);
     DCHECK_GT(CountersPerRegion, 0);
     DCHECK_GT(MaxValue, 0);
@@ -115,21 +203,8 @@
         roundUp(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >>
         PackingRatioLog;
     BufferSize = SizePerRegion * sizeof(*Buffer) * Regions;
-    if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) &&
-        Mutex.tryLock()) {
-      Buffer = &StaticBuffer[0];
-      memset(Buffer, 0, BufferSize);
-    } else {
-      // When using a heap-based buffer, precommit the pages backing the
-      // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization
-      // where page fault exceptions are skipped as the allocated memory
-      // is accessed.
-      const uptr MmapFlags =
-          MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0);
-      Buffer = reinterpret_cast<uptr *>(
-          map(nullptr, roundUp(BufferSize, getPageSizeCached()),
-              "scudo:counters", MmapFlags, &MapData));
-    }
+    Buffer = Buffers.getBuffer(BufferSize);
+    DCHECK_NE(Buffer, nullptr);
   }
 
   bool isAllocated() const { return !!Buffer; }
@@ -206,8 +281,6 @@
 
   uptr getBufferSize() const { return BufferSize; }
 
-  static const uptr StaticBufferCount = 2048U;
-
 private:
   uptr Regions;
   uptr NumCounters;
@@ -219,10 +292,12 @@
   uptr SizePerRegion;
   uptr BufferSize;
   uptr *Buffer;
-  [[no_unique_address]] MapPlatformData MapData = {};
 
-  static HybridMutex Mutex;
-  static uptr StaticBuffer[StaticBufferCount] GUARDED_BY(Mutex);
+  // We may consider making this configurable if there are cases which may
+  // benefit from this.
+  static const uptr StaticBufferCount = 2U;
+  static const uptr StaticBufferSize = 512U;
+  static BufferPool<StaticBufferCount, StaticBufferSize> Buffers;
 };
 
 template <class ReleaseRecorderT> class FreePagesRangeTracker {
diff --git a/standalone/tests/release_test.cpp b/standalone/tests/release_test.cpp
index e549d0d..173928e 100644
--- a/standalone/tests/release_test.cpp
+++ b/standalone/tests/release_test.cpp
@@ -562,3 +562,24 @@
   testReleasePartialRegion<scudo::FuchsiaSizeClassMap>();
   testReleasePartialRegion<scudo::SvelteSizeClassMap>();
 }
+
+TEST(ScudoReleaseTest, BufferPool) {
+  constexpr scudo::uptr StaticBufferCount = SCUDO_WORDSIZE - 1;
+  constexpr scudo::uptr StaticBufferSize = 512U;
+  scudo::BufferPool<StaticBufferCount, StaticBufferSize> Pool;
+
+  std::vector<std::pair<scudo::uptr *, scudo::uptr>> Buffers;
+  for (scudo::uptr I = 0; I < StaticBufferCount; ++I) {
+    scudo::uptr *P = Pool.getBuffer(StaticBufferSize);
+    EXPECT_TRUE(Pool.isStaticBufferTestOnly(P, StaticBufferSize));
+    Buffers.emplace_back(P, StaticBufferSize);
+  }
+
+  // The static buffer is supposed to be used up.
+  scudo::uptr *P = Pool.getBuffer(StaticBufferSize);
+  EXPECT_FALSE(Pool.isStaticBufferTestOnly(P, StaticBufferSize));
+
+  Pool.releaseBuffer(P, StaticBufferSize);
+  for (auto &Buffer : Buffers)
+    Pool.releaseBuffer(Buffer.first, Buffer.second);
+}