diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -65,6 +65,13 @@
     uptr GroupId;
     // Cache value of TransferBatch::getMaxCached()
     u16 MaxCachedPerBatch;
+    // Number of blocks pushed into this group. This is an increment-only
+    // counter.
+    uptr PushedBlocks;
+    // This is used to track how many blocks are pushed since last time we check
+    // `PushedBlocks`. It's useful for page releasing to determine the usage of
+    // a BatchGroup.
+    uptr PushedBlocksAtLastCheckpoint;
     // Blocks are managed by TransferBatch in a list.
     SinglyLinkedList<TransferBatch> Batches;
   };
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -116,6 +116,9 @@
   uptr compactPtrGroup(CompactPtrT CompactPtr) {
     return CompactPtr >> GroupSizeLog;
   }
+  uptr batchGroupBase(uptr GroupId) {
+    return GroupId << GroupSizeLog;
+  }
 
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
@@ -421,6 +424,8 @@
 
       BG->GroupId = GroupId;
       BG->Batches.push_front(TB);
+      BG->PushedBlocks = 0;
+      BG->PushedBlocksAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -446,6 +451,8 @@
         CurBatch->appendFromArray(&Array[I], AppendSize);
         I += AppendSize;
       }
+
+      BG->PushedBlocks += Size;
     };
 
     BatchGroup *Cur = Sci->FreeList.front();
@@ -658,10 +665,6 @@
     if (BlockSize < PageSize / 16U) {
       if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
-      if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
-          (100U - 1U - BlockSize / 16U))
-        return 0;
     }
 
     if (!Force) {
@@ -690,8 +693,31 @@
       return reinterpret_cast<uptr>(CompactPtr);
     };
     PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
-    for (BatchGroup &BG : Sci->FreeList)
+    const uptr GroupSize = (1U << GroupSizeLog);
+    for (BatchGroup &BG : Sci->FreeList) {
+      const uptr BytesPushedInBG =
+          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
+      if (BytesPushedInBG * BlockSize < PageSize)
+        continue;
+
+      const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
+          BG.Batches.back()->getCount();
+      const uptr BytesInBG = NumBlocks * BlockSize;
+      const uptr BatchGroupEnd = batchGroupBase(BG.GroupId) + GroupSize;
+      // Given the randomness property, we try to release the pages only if the
+      // bytes used by free blocks exceed certain proportion of group size. Note
+      // that this heuristic only applies when all the spaces in a BatchGroup
+      // are allocated.
+      if (Sci->AllocatedUser >= BatchGroupEnd &&
+          (BytesInBG * 100U) / GroupSize < (100U - 1U - BlockSize / 16U)) {
+        continue;
+      }
+
+      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // Note that we don't always visit blocks in each BatchGroup so that we
+      // may miss the chance of releasing certain pages that across BatchGroups.
       Context.markFreeBlocks(BG.Batches, DecompactPtr, Base);
+    }
 
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -370,6 +370,10 @@
   static uptr compactPtrGroup(CompactPtrT CompactPtr) {
     return CompactPtr >> (GroupSizeLog - CompactPtrScale);
   }
+  static uptr batchGroupBase(uptr Base, uptr GroupId) {
+    return (GroupId << GroupSizeLog) + Base;
+
+  }
 
   // Push the blocks to their batch group. The layout will be like,
   //
@@ -424,6 +428,8 @@
 
       BG->GroupId = GroupId;
       BG->Batches.push_front(TB);
+      BG->PushedBlocks = 0;
+      BG->PushedBlocksAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -450,6 +456,8 @@
         CurBatch->appendFromArray(&Array[I], AppendSize);
         I += AppendSize;
       }
+
+      BG->PushedBlocks += Size;
     };
 
     BatchGroup *Cur = Region->FreeList.front();
@@ -691,8 +699,32 @@
     };
     auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
     PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
-    for (BatchGroup &BG : Region->FreeList)
+    const uptr GroupSize = (1U << GroupSizeLog);
+    for (BatchGroup &BG : Region->FreeList) {
+      const uptr BytesPushedInBG =
+          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
+      if (BytesPushedInBG * BlockSize < PageSize)
+        continue;
+
+      const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
+          BG.Batches.back()->getCount();
+      const uptr BytesInBG = NumBlocks * BlockSize;
+      const uptr BatchGroupEnd =
+          batchGroupBase(BG.GroupId, CompactPtrBase) + GroupSize;
+      // Given the randomness property, we try to release the pages only if the
+      // bytes used by free blocks exceed certain proportion of group size. Note
+      // that this heuristic only applies when all the spaces in a BatchGroup
+      // are allocated.
+      if (Region->AllocatedUser >= BatchGroupEnd &&
+          (BytesInBG * 100U) / GroupSize < (100U - 1U - BlockSize / 16U)) {
+        continue;
+      }
+
+      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // Note that we don't always visit blocks in each BatchGroup so that we
+      // may miss the chance of releasing certain pages that across BatchGroups.
       Context.markFreeBlocks(BG.Batches, DecompactPtr, Region->RegionBeg);
+    }
 
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -506,7 +506,7 @@
   static const scudo::uptr MinSizeLog = 10;
   static const scudo::uptr MidSizeLog = 10;
   static const scudo::uptr MaxSizeLog = 13;
-  static const scudo::u16 MaxNumCachedHint = 4;
+  static const scudo::u16 MaxNumCachedHint = 8;
   static const scudo::uptr MaxBytesCachedLog = 12;
   static const scudo::uptr SizeDelta = 0;
 };