[scudo] Use bytes-in-freelist as a hint of page release

Tracking the pushed bytes between to releaseToOSMaybe calls may lead to
a overestimated case that if we do malloc 2KB -> free 2KB -> malloc 2KB
-> free 2KB, we may think we have released 4KB but it only releases 2KB
actually. Switch to use bytes-in-freelist excludes more cases that can't
release the pages

Reviewed By: cferris

Differential Revision: https://reviews.llvm.org/D146400

GitOrigin-RevId: e74834f9bb6bb608e00e2cd230cb3b63df0803b9
Change-Id: I4f381886a28bb5f467c859fab4f3818d79b6b6ac
diff --git a/standalone/local_cache.h b/standalone/local_cache.h
index 321d4f5..92869ea 100644
--- a/standalone/local_cache.h
+++ b/standalone/local_cache.h
@@ -69,10 +69,9 @@
     // Number of blocks pushed into this group. This is an increment-only
     // counter.
     uptr PushedBlocks;
-    // This is used to track how many blocks are pushed since last time we
-    // checked `PushedBlocks`. It's useful for page releasing to determine the
-    // usage of a BatchGroup.
-    uptr PushedBlocksAtLastCheckpoint;
+    // This is used to track how many bytes are not in-use since last time we
+    // tried to release pages.
+    uptr BytesInBGAtLastCheckpoint;
     // Blocks are managed by TransferBatch in a list.
     SinglyLinkedList<TransferBatch> Batches;
   };
diff --git a/standalone/primary32.h b/standalone/primary32.h
index 3be689f..7ac8df9 100644
--- a/standalone/primary32.h
+++ b/standalone/primary32.h
@@ -317,7 +317,7 @@
   };
 
   struct ReleaseToOsInfo {
-    uptr PushedBlocksAtLastRelease;
+    uptr BytesInFreeListAtLastCheckpoint;
     uptr RangesReleased;
     uptr LastReleasedBytes;
     u64 LastReleaseAtNs;
@@ -489,7 +489,7 @@
       // TODO(chiahungduan): Avoid the use of push_back() in `Batches`.
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
-      BG->PushedBlocksAtLastCheckpoint = 0;
+      BG->BytesInBGAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -736,13 +736,34 @@
     const uptr BytesInFreeList =
         Sci->AllocatedUser -
         (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize;
-    if (BytesInFreeList < PageSize)
-      return 0; // No chance to release anything.
-    const uptr BytesPushed =
-        (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) *
-        BlockSize;
-    if (BytesPushed < PageSize)
-      return 0; // Nothing new to release.
+
+    bool MaySkip = false;
+
+    if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
+      Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
+      MaySkip = true;
+    }
+
+    // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
+    // so that we won't underestimate the releasable pages. For example, the
+    // following is the region usage,
+    //
+    //  BytesInFreeListAtLastCheckpoint   AllocatedUser
+    //                v                         v
+    //  |--------------------------------------->
+    //         ^                   ^
+    //  BytesInFreeList     ReleaseThreshold
+    //
+    // In general, if we have collected enough bytes and the amount of free
+    // bytes meets the ReleaseThreshold, we will try to do page release. If we
+    // don't update `BytesInFreeListAtLastCheckpoint` when the current
+    // `BytesInFreeList` is smaller, we may take longer time to wait for enough
+    // freed blocks because we miss the bytes between
+    // (BytesInFreeListAtLastCheckpoint - BytesInFreeList).
+    const uptr PushedBytesDelta =
+        BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
+    if (PushedBytesDelta < PageSize)
+      MaySkip = true;
 
     const bool CheckDensity =
         BlockSize < PageSize / 16U && ReleaseType != ReleaseToOS::ForceAll;
@@ -751,10 +772,14 @@
     // amount of batches pushed to the freelist before attempting to release.
     if (CheckDensity) {
       if (ReleaseType == ReleaseToOS::Normal &&
-          BytesPushed < Sci->AllocatedUser / 16U)
-        return 0;
+          PushedBytesDelta < Sci->AllocatedUser / 16U) {
+        MaySkip = true;
+      }
     }
 
+    if (MaySkip && ReleaseType != ReleaseToOS::ForceAll)
+      return 0;
+
     if (ReleaseType == ReleaseToOS::Normal) {
       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
       if (IntervalMs < 0)
@@ -785,11 +810,6 @@
       return reinterpret_cast<uptr>(CompactPtr);
     };
     for (BatchGroup &BG : Sci->FreeList) {
-      const uptr PushedBytesDelta =
-          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
-      if (PushedBytesDelta * BlockSize < PageSize)
-        continue;
-
       const uptr GroupBase = decompactGroupBase(BG.CompactPtrGroupBase);
       // The `GroupSize` may not be divided by `BlockSize`, which means there is
       // an unused space at the end of Region. Exclude that space to avoid
@@ -805,6 +825,16 @@
       const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
                              BG.Batches.front()->getCount();
       const uptr BytesInBG = NumBlocks * BlockSize;
+
+      if (ReleaseType != ReleaseToOS::ForceAll &&
+          BytesInBG <= BG.BytesInBGAtLastCheckpoint) {
+        BG.BytesInBGAtLastCheckpoint = BytesInBG;
+        continue;
+      }
+      const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint;
+      if (PushedBytesDelta < PageSize)
+        continue;
+
       // Given the randomness property, we try to release the pages only if the
       // bytes used by free blocks exceed certain proportion of allocated
       // spaces.
@@ -813,7 +843,9 @@
         continue;
       }
 
-      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // TODO: Consider updating this after page release if `ReleaseRecorder`
+      // can tell the releasd bytes in each group.
+      BG.BytesInBGAtLastCheckpoint = BytesInBG;
 
       const uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize;
       const uptr RegionIndex = (GroupBase - Base) / RegionSize;
@@ -848,7 +880,7 @@
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {
-      Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
+      Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
       Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
       Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
       TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes;
diff --git a/standalone/primary64.h b/standalone/primary64.h
index bca5ab8..00b7758 100644
--- a/standalone/primary64.h
+++ b/standalone/primary64.h
@@ -430,7 +430,7 @@
   };
 
   struct ReleaseToOsInfo {
-    uptr PushedBlocksAtLastRelease;
+    uptr BytesInFreeListAtLastCheckpoint;
     uptr RangesReleased;
     uptr LastReleasedBytes;
     u64 LastReleaseAtNs;
@@ -586,7 +586,7 @@
       // TODO(chiahungduan): Avoid the use of push_back() in `Batches`.
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
-      BG->PushedBlocksAtLastCheckpoint = 0;
+      BG->BytesInBGAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -828,26 +828,50 @@
     const uptr BytesInFreeList =
         Region->AllocatedUser -
         (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
-    if (BytesInFreeList < PageSize)
-      return 0; // No chance to release anything.
-    const uptr BytesPushed = (Region->Stats.PushedBlocks -
-                              Region->ReleaseInfo.PushedBlocksAtLastRelease) *
-                             BlockSize;
-    if (BytesPushed < PageSize)
-      return 0; // Nothing new to release.
 
-    const bool CheckDensity =
-        isSmallBlock(BlockSize) && ReleaseType != ReleaseToOS::ForceAll;
+    bool MaySkip = false;
+
+    // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
+    // so that we won't underestimate the releasable pages. For example, the
+    // following is the region usage,
+    //
+    //  BytesInFreeListAtLastCheckpoint   AllocatedUser
+    //                v                         v
+    //  |--------------------------------------->
+    //         ^                   ^
+    //  BytesInFreeList     ReleaseThreshold
+    //
+    // In general, if we have collected enough bytes and the amount of free
+    // bytes meets the ReleaseThreshold, we will try to do page release. If we
+    // don't update `BytesInFreeListAtLastCheckpoint` when the current
+    // `BytesInFreeList` is smaller, we may take longer time to wait for enough
+    // freed blocks because we miss the bytes between
+    // (BytesInFreeListAtLastCheckpoint - BytesInFreeList).
+    if (BytesInFreeList <=
+        Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
+      Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
+      MaySkip = true;
+    }
+
+    const uptr RegionPushedBytesDelta =
+        BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
+    if (RegionPushedBytesDelta < PageSize)
+      MaySkip = true;
+
+    const bool CheckDensity = isSmallBlock(BlockSize);
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
     if (CheckDensity) {
       if (ReleaseType == ReleaseToOS::Normal &&
-          BytesPushed < Region->TryReleaseThreshold) {
-        return 0;
+          RegionPushedBytesDelta < Region->TryReleaseThreshold) {
+        MaySkip = true;
       }
     }
 
+    if (MaySkip && ReleaseType != ReleaseToOS::ForceAll)
+      return 0;
+
     if (ReleaseType == ReleaseToOS::Normal) {
       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
       if (IntervalMs < 0)
@@ -889,14 +913,6 @@
 
     for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr;
          BG != nullptr;) {
-      const uptr PushedBytesDelta =
-          (BG->PushedBlocks - BG->PushedBlocksAtLastCheckpoint) * BlockSize;
-      if (PushedBytesDelta < PageSize) {
-        Prev = BG;
-        BG = BG->Next;
-        continue;
-      }
-
       // Group boundary is always GroupSize-aligned from CompactPtr base. The
       // layout of memory groups is like,
       //
@@ -930,6 +946,17 @@
       const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch +
                              BG->Batches.front()->getCount();
       const uptr BytesInBG = NumBlocks * BlockSize;
+
+      if (ReleaseType != ReleaseToOS::ForceAll &&
+          BytesInBG <= BG->BytesInBGAtLastCheckpoint) {
+        BG->BytesInBGAtLastCheckpoint = BytesInBG;
+        Prev = BG;
+        BG = BG->Next;
+        continue;
+      }
+
+      const uptr PushedBytesDelta = BG->BytesInBGAtLastCheckpoint - BytesInBG;
+
       // Given the randomness property, we try to release the pages only if the
       // bytes used by free blocks exceed certain proportion of group size. Note
       // that this heuristic only applies when all the spaces in a BatchGroup
@@ -1013,6 +1040,13 @@
       BatchGroup *Cur = BG;
       BG = BG->Next;
 
+      // Ideally, we may want to update this only after successful release.
+      // However, for smaller blocks, each block marking is a costly operation.
+      // Therefore, we update it earlier.
+      // TODO: Consider updating this after page release if `ReleaseRecorder`
+      // can tell the releasd bytes in each group.
+      Cur->BytesInBGAtLastCheckpoint = BytesInBG;
+
       if (Prev != nullptr)
         Region->FreeList.extract(Prev, Cur);
       else
@@ -1053,8 +1087,6 @@
                                ReleaseRangeSize, ReleaseOffset);
 
     for (BatchGroup &BG : GroupToRelease) {
-      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
-
       const uptr BatchGroupBase =
           decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase);
       const uptr BatchGroupEnd = BatchGroupBase + GroupSize;
@@ -1099,8 +1131,7 @@
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {
-      Region->ReleaseInfo.PushedBlocksAtLastRelease =
-          Region->Stats.PushedBlocks;
+      Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
       Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
       Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
     }