diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h
--- a/compiler-rt/lib/scudo/standalone/allocator_config.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.h
@@ -32,7 +32,7 @@
   // 512KB regions
   typedef SizeClassAllocator32<SizeClassMap, 19U> Primary;
 #endif
-  typedef MapAllocator<> Secondary;
+  typedef MapAllocator<MapAllocatorCache<>> Secondary;
   template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive
 };
 
@@ -47,7 +47,7 @@
   // 512KB regions
   typedef SizeClassAllocator32<SizeClassMap, 19U> Primary;
 #endif
-  typedef MapAllocator<> Secondary;
+  typedef MapAllocator<MapAllocatorCache<>> Secondary;
   template <class A>
   using TSDRegistryT = TSDRegistrySharedT<A, 2U>; // Shared, max 2 TSDs.
 };
@@ -61,7 +61,7 @@
   // 64KB regions
   typedef SizeClassAllocator32<SizeClassMap, 16U> Primary;
 #endif
-  typedef MapAllocator<0U> Secondary;
+  typedef MapAllocator<MapAllocatorCache<4U, 1UL << 18>> Secondary;
   template <class A>
   using TSDRegistryT = TSDRegistrySharedT<A, 1U>; // Shared, only 1 TSD.
 };
@@ -70,7 +70,7 @@
 struct FuchsiaConfig {
   // 1GB Regions
   typedef SizeClassAllocator64<DefaultSizeClassMap, 30U> Primary;
-  typedef MapAllocator<0U> Secondary;
+  typedef MapAllocator<MapAllocatorNoCache> Secondary;
   template <class A>
   using TSDRegistryT = TSDRegistrySharedT<A, 8U>; // Shared, max 8 TSDs.
 };
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -141,8 +141,9 @@
         static_cast<u32>(getFlags()->quarantine_max_chunk_size);
 
     Stats.initLinkerInitialized();
-    Primary.initLinkerInitialized(getFlags()->release_to_os_interval_ms);
-    Secondary.initLinkerInitialized(&Stats);
+    const s32 ReleaseToOsIntervalMs = getFlags()->release_to_os_interval_ms;
+    Primary.initLinkerInitialized(ReleaseToOsIntervalMs);
+    Secondary.initLinkerInitialized(&Stats, ReleaseToOsIntervalMs);
 
     Quarantine.init(
         static_cast<uptr>(getFlags()->quarantine_size_kb << 10),
diff --git a/compiler-rt/lib/scudo/standalone/flags.inc b/compiler-rt/lib/scudo/standalone/flags.inc
--- a/compiler-rt/lib/scudo/standalone/flags.inc
+++ b/compiler-rt/lib/scudo/standalone/flags.inc
@@ -45,6 +45,6 @@
            "returning NULL in otherwise non-fatal error scenarios, eg: OOM, "
            "invalid allocation alignments, etc.")
 
-SCUDO_FLAG(int, release_to_os_interval_ms, 5000,
+SCUDO_FLAG(int, release_to_os_interval_ms, SCUDO_ANDROID ? 1000 : 5000,
            "Interval (in milliseconds) at which to attempt release of unused "
            "memory to the OS. Negative values disable the feature.")
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -74,8 +74,7 @@
       Sci->RandState = getRandomU32(&Seed);
       // See comment in the 64-bit primary about releasing smaller size classes.
       Sci->CanRelease = (ReleaseToOsInterval >= 0) &&
-                        (I != SizeClassMap::BatchClassId) &&
-                        (getSizeByClassId(I) >= (PageSize / 32));
+                        (getSizeByClassId(I) >= (PageSize / 64));
     }
     ReleaseToOsIntervalMs = ReleaseToOsInterval;
   }
@@ -385,7 +384,7 @@
       if (IntervalMs < 0)
         return 0;
       if (Sci->ReleaseInfo.LastReleaseAtNs +
-              static_cast<uptr>(IntervalMs) * 1000000ULL >
+              static_cast<u64>(IntervalMs) * 1000000 >
           getMonotonicTime()) {
         return 0; // Memory was returned recently.
       }
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -87,8 +87,7 @@
       // limit is mostly arbitrary and based on empirical observations.
       // TODO(kostyak): make the lower limit a runtime option
       Region->CanRelease = (ReleaseToOsInterval >= 0) &&
-                           (I != SizeClassMap::BatchClassId) &&
-                           (getSizeByClassId(I) >= (PageSize / 32));
+                           (getSizeByClassId(I) >= (PageSize / 64));
       Region->RandState = getRandomU32(&Seed);
     }
     ReleaseToOsIntervalMs = ReleaseToOsInterval;
@@ -401,7 +400,7 @@
       if (IntervalMs < 0)
         return 0;
       if (Region->ReleaseInfo.LastReleaseAtNs +
-              static_cast<uptr>(IntervalMs) * 1000000ULL >
+              static_cast<u64>(IntervalMs) * 1000000 >
           getMonotonicTime()) {
         return 0; // Memory was returned recently.
       }
diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -200,7 +200,13 @@
   if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
     // Each chunk affects one page only.
     for (const auto &It : FreeList) {
-      for (u32 I = 0; I < It.getCount(); I++) {
+      // If dealing with a TransferBatch, the first pointer of the batch will
+      // point to the batch itself, we do not want to mark this for release as
+      // the batch is in use, so skip the first entry.
+      const bool IsTransferBatch =
+          (It.getCount() != 0) &&
+          (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
+      for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
         const uptr P = reinterpret_cast<uptr>(It.get(I));
         if (P >= Base && P < End)
           Counters.inc((P - Base) >> PageSizeLog);
@@ -209,7 +215,11 @@
   } else {
     // In all other cases chunks might affect more than one page.
     for (const auto &It : FreeList) {
-      for (u32 I = 0; I < It.getCount(); I++) {
+      // See TransferBatch comment above.
+      const bool IsTransferBatch =
+          (It.getCount() != 0) &&
+          (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
+      for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
         const uptr P = reinterpret_cast<uptr>(It.get(I));
         if (P >= Base && P < End)
           Counters.incRange((P - Base) >> PageSizeLog,
diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -48,20 +48,183 @@
 
 } // namespace LargeBlock
 
-template <uptr MaxFreeListSize = 32U> class MapAllocator {
+class MapAllocatorNoCache {
 public:
-  // Ensure the freelist is disabled on Fuchsia, since it doesn't support
-  // releasing Secondary blocks yet.
-  static_assert(!SCUDO_FUCHSIA || MaxFreeListSize == 0U, "");
+  void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {}
+  void init(UNUSED s32 ReleaseToOsInterval) {}
+  bool retrieve(UNUSED uptr Size, UNUSED LargeBlock::Header **H) {
+    return false;
+  }
+  bool store(UNUSED LargeBlock::Header *H) { return false; }
+  static bool canCache(UNUSED uptr Size) { return false; }
+  void disable() {}
+  void enable() {}
+};
+
+template <uptr MaxEntriesCount = 32U, uptr MaxEntrySize = 1UL << 19>
+class MapAllocatorCache {
+public:
+  // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length
+  // arrays are an extension for some compilers.
+  // FIXME(kostyak): support (partially) the cache on Fuchsia.
+  static_assert(!SCUDO_FUCHSIA || MaxEntriesCount == 0U, "");
+
+  void initLinkerInitialized(s32 ReleaseToOsInterval) {
+    ReleaseToOsIntervalMs = ReleaseToOsInterval;
+  }
+  void init(s32 ReleaseToOsInterval) {
+    memset(this, 0, sizeof(*this));
+    initLinkerInitialized(ReleaseToOsInterval);
+  }
+
+  bool store(LargeBlock::Header *H) {
+    bool EntryCached = false;
+    bool EmptyCache = false;
+    const u64 Time = getMonotonicTime();
+    {
+      ScopedLock L(Mutex);
+      if (EntriesCount == MaxEntriesCount) {
+        if (IsFullEvents++ == 4U)
+          EmptyCache = true;
+      } else {
+        for (uptr I = 0; I < MaxEntriesCount; I++) {
+          if (Entries[I].Block)
+            continue;
+          if (I != 0)
+            Entries[I] = Entries[0];
+          Entries[0].Block = reinterpret_cast<uptr>(H);
+          Entries[0].BlockEnd = H->BlockEnd;
+          Entries[0].MapBase = H->MapBase;
+          Entries[0].MapSize = H->MapSize;
+          Entries[0].Time = Time;
+          EntriesCount++;
+          EntryCached = true;
+          break;
+        }
+      }
+    }
+    if (EmptyCache)
+      empty();
+    else if (ReleaseToOsIntervalMs >= 0)
+      releaseOlderThan(Time -
+                       static_cast<u64>(ReleaseToOsIntervalMs) * 1000000);
+    return EntryCached;
+  }
+
+  bool retrieve(uptr Size, LargeBlock::Header **H) {
+    ScopedLock L(Mutex);
+    if (EntriesCount == 0)
+      return false;
+    for (uptr I = 0; I < MaxEntriesCount; I++) {
+      if (!Entries[I].Block)
+        continue;
+      const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block;
+      if (Size > BlockSize)
+        continue;
+      if (Size < BlockSize - getPageSizeCached() * 4U)
+        continue;
+      *H = reinterpret_cast<LargeBlock::Header *>(Entries[I].Block);
+      Entries[I].Block = 0;
+      (*H)->BlockEnd = Entries[I].BlockEnd;
+      (*H)->MapBase = Entries[I].MapBase;
+      (*H)->MapSize = Entries[I].MapSize;
+      EntriesCount--;
+      return true;
+    }
+    return false;
+  }
+
+  static bool canCache(uptr Size) {
+    return MaxEntriesCount != 0U && Size <= MaxEntrySize;
+  }
+
+  void disable() { Mutex.lock(); }
+
+  void enable() { Mutex.unlock(); }
+
+private:
+  void empty() {
+    struct {
+      void *MapBase;
+      uptr MapSize;
+      MapPlatformData Data;
+    } MapInfo[MaxEntriesCount];
+    uptr N = 0;
+    {
+      ScopedLock L(Mutex);
+      for (uptr I = 0; I < MaxEntriesCount; I++) {
+        if (!Entries[I].Block)
+          continue;
+        MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase);
+        MapInfo[N].MapSize = Entries[I].MapSize;
+        MapInfo[N].Data = Entries[I].Data;
+        Entries[I].Block = 0;
+        N++;
+      }
+      EntriesCount = 0;
+      IsFullEvents = 0;
+    }
+    for (uptr I = 0; I < N; I++)
+      unmap(MapInfo[I].MapBase, MapInfo[I].MapSize, UNMAP_ALL,
+            &MapInfo[I].Data);
+  }
+
+  void releaseOlderThan(u64 Time) {
+    struct {
+      uptr Block;
+      uptr BlockSize;
+      MapPlatformData Data;
+    } BlockInfo[MaxEntriesCount];
+    uptr N = 0;
+    {
+      ScopedLock L(Mutex);
+      if (!EntriesCount)
+        return;
+      for (uptr I = 0; I < MaxEntriesCount; I++) {
+        if (!Entries[I].Block || !Entries[I].Time)
+          continue;
+        if (Entries[I].Time > Time)
+          continue;
+        BlockInfo[N].Block = Entries[I].Block;
+        BlockInfo[N].BlockSize = Entries[I].BlockEnd - Entries[I].Block;
+        BlockInfo[N].Data = Entries[I].Data;
+        Entries[I].Time = 0;
+        N++;
+      }
+    }
+    for (uptr I = 0; I < N; I++)
+      releasePagesToOS(BlockInfo[I].Block, 0, BlockInfo[I].BlockSize,
+                       &BlockInfo[I].Data);
+  }
+
+  struct CachedBlock {
+    uptr Block;
+    uptr BlockEnd;
+    uptr MapBase;
+    uptr MapSize;
+    MapPlatformData Data;
+    u64 Time;
+  };
 
-  void initLinkerInitialized(GlobalStats *S) {
+  HybridMutex Mutex;
+  CachedBlock Entries[MaxEntriesCount];
+  u32 EntriesCount;
+  uptr LargestSize;
+  u32 IsFullEvents;
+  s32 ReleaseToOsIntervalMs;
+};
+
+template <class CacheT> class MapAllocator {
+public:
+  void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) {
+    Cache.initLinkerInitialized(ReleaseToOsInterval);
     Stats.initLinkerInitialized();
     if (LIKELY(S))
       S->link(&Stats);
   }
-  void init(GlobalStats *S) {
+  void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) {
     memset(this, 0, sizeof(*this));
-    initLinkerInitialized(S);
+    initLinkerInitialized(S, ReleaseToOsInterval);
   }
 
   void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr,
@@ -79,22 +242,28 @@
 
   void getStats(ScopedString *Str) const;
 
-  void disable() { Mutex.lock(); }
+  void disable() {
+    Mutex.lock();
+    Cache.disable();
+  }
 
-  void enable() { Mutex.unlock(); }
+  void enable() {
+    Cache.enable();
+    Mutex.unlock();
+  }
 
   template <typename F> void iterateOverBlocks(F Callback) const {
     for (const auto &H : InUseBlocks)
       Callback(reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize());
   }
 
-  static uptr getMaxFreeListSize(void) { return MaxFreeListSize; }
+  static uptr canCache(uptr Size) { return CacheT::canCache(Size); }
 
 private:
+  CacheT Cache;
+
   HybridMutex Mutex;
   DoublyLinkedList<LargeBlock::Header> InUseBlocks;
-  // The free list is sorted based on the committed size of blocks.
-  DoublyLinkedList<LargeBlock::Header> FreeBlocks;
   uptr AllocatedBytes;
   uptr FreedBytes;
   uptr LargestSize;
@@ -114,35 +283,32 @@
 // For allocations requested with an alignment greater than or equal to a page,
 // the committed memory will amount to something close to Size - AlignmentHint
 // (pending rounding and headers).
-template <uptr MaxFreeListSize>
-void *MapAllocator<MaxFreeListSize>::allocate(uptr Size, uptr AlignmentHint,
-                                              uptr *BlockEnd,
-                                              bool ZeroContents) {
+template <class CacheT>
+void *MapAllocator<CacheT>::allocate(uptr Size, uptr AlignmentHint,
+                                     uptr *BlockEnd, bool ZeroContents) {
   DCHECK_GE(Size, AlignmentHint);
   const uptr PageSize = getPageSizeCached();
   const uptr RoundedSize =
       roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize);
 
-  if (MaxFreeListSize && AlignmentHint < PageSize) {
-    ScopedLock L(Mutex);
-    for (auto &H : FreeBlocks) {
-      const uptr FreeBlockSize = H.BlockEnd - reinterpret_cast<uptr>(&H);
-      if (FreeBlockSize < RoundedSize)
-        continue;
-      // Candidate free block should only be at most 4 pages larger.
-      if (FreeBlockSize > RoundedSize + 4 * PageSize)
-        break;
-      FreeBlocks.remove(&H);
-      InUseBlocks.push_back(&H);
-      AllocatedBytes += FreeBlockSize;
-      NumberOfAllocs++;
-      Stats.add(StatAllocated, FreeBlockSize);
+  if (AlignmentHint < PageSize && CacheT::canCache(RoundedSize)) {
+    LargeBlock::Header *H;
+    if (Cache.retrieve(RoundedSize, &H)) {
       if (BlockEnd)
-        *BlockEnd = H.BlockEnd;
-      void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(&H) +
+        *BlockEnd = H->BlockEnd;
+      void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(H) +
                                            LargeBlock::getHeaderSize());
       if (ZeroContents)
-        memset(Ptr, 0, H.BlockEnd - reinterpret_cast<uptr>(Ptr));
+        memset(Ptr, 0, H->BlockEnd - reinterpret_cast<uptr>(Ptr));
+      const uptr BlockSize = H->BlockEnd - reinterpret_cast<uptr>(H);
+      {
+        ScopedLock L(Mutex);
+        InUseBlocks.push_back(H);
+        AllocatedBytes += BlockSize;
+        NumberOfAllocs++;
+        Stats.add(StatAllocated, BlockSize);
+        Stats.add(StatMapped, H->MapSize);
+      }
       return Ptr;
     }
   }
@@ -191,6 +357,8 @@
   H->MapSize = MapEnd - MapBase;
   H->BlockEnd = CommitBase + CommitSize;
   H->Data = Data;
+  if (BlockEnd)
+    *BlockEnd = CommitBase + CommitSize;
   {
     ScopedLock L(Mutex);
     InUseBlocks.push_back(H);
@@ -201,52 +369,31 @@
     Stats.add(StatAllocated, CommitSize);
     Stats.add(StatMapped, H->MapSize);
   }
-  if (BlockEnd)
-    *BlockEnd = CommitBase + CommitSize;
   return reinterpret_cast<void *>(Ptr + LargeBlock::getHeaderSize());
 }
 
-template <uptr MaxFreeListSize>
-void MapAllocator<MaxFreeListSize>::deallocate(void *Ptr) {
+template <class CacheT> void MapAllocator<CacheT>::deallocate(void *Ptr) {
   LargeBlock::Header *H = LargeBlock::getHeader(Ptr);
   const uptr Block = reinterpret_cast<uptr>(H);
+  const uptr CommitSize = H->BlockEnd - Block;
   {
     ScopedLock L(Mutex);
     InUseBlocks.remove(H);
-    const uptr CommitSize = H->BlockEnd - Block;
     FreedBytes += CommitSize;
     NumberOfFrees++;
     Stats.sub(StatAllocated, CommitSize);
-    if (MaxFreeListSize && FreeBlocks.size() < MaxFreeListSize) {
-      bool Inserted = false;
-      for (auto &F : FreeBlocks) {
-        const uptr FreeBlockSize = F.BlockEnd - reinterpret_cast<uptr>(&F);
-        if (FreeBlockSize >= CommitSize) {
-          FreeBlocks.insert(H, &F);
-          Inserted = true;
-          break;
-        }
-      }
-      if (!Inserted)
-        FreeBlocks.push_back(H);
-      const uptr RoundedAllocationStart =
-          roundUpTo(Block + LargeBlock::getHeaderSize(), getPageSizeCached());
-      MapPlatformData Data = H->Data;
-      // TODO(kostyak): use release_to_os_interval_ms
-      releasePagesToOS(Block, RoundedAllocationStart - Block,
-                       H->BlockEnd - RoundedAllocationStart, &Data);
-      return;
-    }
     Stats.sub(StatMapped, H->MapSize);
   }
+  if (CacheT::canCache(CommitSize) && Cache.store(H))
+    return;
   void *Addr = reinterpret_cast<void *>(H->MapBase);
   const uptr Size = H->MapSize;
   MapPlatformData Data = H->Data;
   unmap(Addr, Size, UNMAP_ALL, &Data);
 }
 
-template <uptr MaxFreeListSize>
-void MapAllocator<MaxFreeListSize>::getStats(ScopedString *Str) const {
+template <class CacheT>
+void MapAllocator<CacheT>::getStats(ScopedString *Str) const {
   Str->append(
       "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times "
       "(%zuK), remains %zu (%zuK) max %zuM\n",
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -347,7 +347,7 @@
   // Tiny allocator, its Primary only serves chunks of 1024 bytes.
   using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>;
   typedef scudo::SizeClassAllocator64<DeathSizeClassMap, 20U> Primary;
-  typedef scudo::MapAllocator<0U> Secondary;
+  typedef scudo::MapAllocator<scudo::MapAllocatorNoCache> Secondary;
   template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U>;
 };
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -29,8 +29,8 @@
   memset(P, 'A', Size);
   EXPECT_GE(SecondaryT::getBlockSize(P), Size);
   L->deallocate(P);
-  // If we are not using a free list, blocks are unmapped on deallocation.
-  if (SecondaryT::getMaxFreeListSize() == 0U)
+  // If the Secondary can't cache that pointer, it will be unmapped.
+  if (!SecondaryT::canCache(Size))
     EXPECT_DEATH(memset(P, 'A', Size), "");
 
   const scudo::uptr Align = 1U << 16;
@@ -55,17 +55,18 @@
 }
 
 TEST(ScudoSecondaryTest, SecondaryBasic) {
-  testSecondaryBasic<scudo::MapAllocator<0U>>();
+  testSecondaryBasic<scudo::MapAllocator<scudo::MapAllocatorNoCache>>();
 #if !SCUDO_FUCHSIA
-  testSecondaryBasic<scudo::MapAllocator<>>();
-  testSecondaryBasic<scudo::MapAllocator<64U>>();
+  testSecondaryBasic<scudo::MapAllocator<scudo::MapAllocatorCache<>>>();
+  testSecondaryBasic<
+      scudo::MapAllocator<scudo::MapAllocatorCache<64U, 1UL << 20>>>();
 #endif
 }
 
 #if SCUDO_FUCHSIA
-using LargeAllocator = scudo::MapAllocator<0U>;
+using LargeAllocator = scudo::MapAllocator<scudo::MapAllocatorNoCache>;
 #else
-using LargeAllocator = scudo::MapAllocator<>;
+using LargeAllocator = scudo::MapAllocator<scudo::MapAllocatorCache<>>;
 #endif
 
 // This exercises a variety of combinations of size and alignment for the