diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h
--- a/compiler-rt/lib/scudo/standalone/allocator_config.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.h
@@ -48,9 +48,10 @@
   typedef SizeClassAllocator32<SizeClassMap, 18U, 1000, 1000> Primary;
 #endif
   // Cache blocks up to 2MB
-  typedef MapAllocator<MapAllocatorCache<32U, 2UL << 20, 0, 1000>> Secondary;
+  typedef MapAllocator<MapAllocatorCache<256U, 32U, 2UL << 20, 0, 1000>>
+      Secondary;
   template <class A>
-  using TSDRegistryT = TSDRegistrySharedT<A, 2U>; // Shared, max 2 TSDs.
+  using TSDRegistryT = TSDRegistrySharedT<A, 8U, 2U>; // Shared, max 8 TSDs.
 };
 
 struct AndroidSvelteConfig {
@@ -62,9 +63,9 @@
   // 64KB regions
   typedef SizeClassAllocator32<SizeClassMap, 16U, 1000, 1000> Primary;
 #endif
-  typedef MapAllocator<MapAllocatorCache<4U, 1UL << 18, 0, 0>> Secondary;
+  typedef MapAllocator<MapAllocatorCache<16U, 4U, 1UL << 18, 0, 0>> Secondary;
   template <class A>
-  using TSDRegistryT = TSDRegistrySharedT<A, 1U>; // Shared, only 1 TSD.
+  using TSDRegistryT = TSDRegistrySharedT<A, 2U, 1U>; // Shared, max 2 TSDs.
 };
 
 #if SCUDO_CAN_USE_PRIMARY64
@@ -73,7 +74,7 @@
   typedef SizeClassAllocator64<DefaultSizeClassMap, 30U> Primary;
   typedef MapAllocator<MapAllocatorNoCache> Secondary;
   template <class A>
-  using TSDRegistryT = TSDRegistrySharedT<A, 8U>; // Shared, max 8 TSDs.
+  using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs.
 };
 #endif
 
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -41,8 +41,6 @@
 
 namespace scudo {
 
-enum class Option { ReleaseInterval, MemtagTuning };
-
 template <class Params, void (*PostInitCallback)(void) = EmptyCallback>
 class Allocator {
 public:
@@ -277,7 +275,7 @@
     }
 #endif // GWP_ASAN_HOOKS
 
-    FillContentsMode FillContents =
+    const FillContentsMode FillContents =
         ZeroContents ? ZeroFill : Options.FillContents;
 
     if (UNLIKELY(Alignment > MaxAlignment)) {
@@ -285,7 +283,7 @@
         return nullptr;
       reportAlignmentTooBig(Alignment, MaxAlignment);
     }
-    if (Alignment < MinAlignment)
+    if (UNLIKELY(Alignment < MinAlignment))
       Alignment = MinAlignment;
 
     // If the requested size happens to be 0 (more common than you might think),
@@ -322,13 +320,11 @@
       if (UNLIKELY(!Block)) {
         while (ClassId < SizeClassMap::LargestClassId) {
           Block = TSD->Cache.allocate(++ClassId);
-          if (LIKELY(Block)) {
+          if (LIKELY(Block))
             break;
-          }
         }
-        if (UNLIKELY(!Block)) {
+        if (UNLIKELY(!Block))
           ClassId = 0;
-        }
       }
       if (UnlockRequired)
         TSD->unlock();
@@ -349,7 +345,7 @@
 
     void *Ptr = reinterpret_cast<void *>(UserPtr);
     void *TaggedPtr = Ptr;
-    if (ClassId) {
+    if (LIKELY(ClassId)) {
       // We only need to zero or tag the contents for Primary backed
       // allocations. We only set tags for primary allocations in order to avoid
       // faulting potentially large numbers of pages for large secondary
@@ -692,11 +688,7 @@
   }
 
   bool setOption(Option O, sptr Value) {
-    if (O == Option::ReleaseInterval) {
-      Primary.setReleaseToOsIntervalMs(static_cast<s32>(Value));
-      Secondary.setReleaseToOsIntervalMs(static_cast<s32>(Value));
-      return true;
-    }
+    initThreadMaybe();
     if (O == Option::MemtagTuning) {
       // Enabling odd/even tags involves a tradeoff between use-after-free
       // detection and buffer overflow detection. Odd/even tags make it more
@@ -705,14 +697,19 @@
       // use-after-free is less likely to be detected because the tag space for
       // any particular chunk is cut in half. Therefore we use this tuning
       // setting to control whether odd/even tags are enabled.
-      if (Value == M_MEMTAG_TUNING_BUFFER_OVERFLOW) {
+      if (Value == M_MEMTAG_TUNING_BUFFER_OVERFLOW)
         Options.UseOddEvenTags = true;
-        return true;
-      }
-      if (Value == M_MEMTAG_TUNING_UAF) {
+      else if (Value == M_MEMTAG_TUNING_UAF)
         Options.UseOddEvenTags = false;
-        return true;
-      }
+      return true;
+    } else {
+      // We leave it to the various sub-components to decide whether or not they
+      // want to handle the option, but we do not want to short-circuit
+      // execution if one of the setOption was to return false.
+      const bool PrimaryResult = Primary.setOption(O, Value);
+      const bool SecondaryResult = Secondary.setOption(O, Value);
+      const bool RegistryResult = TSDRegistry.setOption(O, Value);
+      return PrimaryResult && SecondaryResult && RegistryResult;
     }
     return false;
   }
@@ -805,8 +802,7 @@
         PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr);
 
     auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool {
-      if (Addr < MemoryAddr ||
-          Addr + archMemoryTagGranuleSize() < Addr ||
+      if (Addr < MemoryAddr || Addr + archMemoryTagGranuleSize() < Addr ||
           Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize)
         return false;
       *Data = &Memory[Addr - MemoryAddr];
@@ -950,10 +946,10 @@
   u32 Cookie;
 
   struct {
-    u8 MayReturnNull : 1;       // may_return_null
+    u8 MayReturnNull : 1;              // may_return_null
     FillContentsMode FillContents : 2; // zero_contents, pattern_fill_contents
-    u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch
-    u8 DeleteSizeMismatch : 1;  // delete_size_mismatch
+    u8 DeallocTypeMismatch : 1;        // dealloc_type_mismatch
+    u8 DeleteSizeMismatch : 1;         // delete_size_mismatch
     u8 TrackAllocationStacks : 1;
     u8 UseOddEvenTags : 1;
     u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size
diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h
--- a/compiler-rt/lib/scudo/standalone/common.h
+++ b/compiler-rt/lib/scudo/standalone/common.h
@@ -182,6 +182,14 @@
   uptr RegionEnd;
 };
 
+enum class Option : u8 {
+  ReleaseInterval,      // Release to OS interval in milliseconds.
+  MemtagTuning,         // Whether to tune tagging for UAF or overflow.
+  MaxCacheEntriesCount, // Maximum number of blocks that can be cached.
+  MaxCacheEntrySize,    // Maximum size of a block that can be cached.
+  MaxTSDsCount,         // Number of usable TSDs for the shared registry.
+};
+
 constexpr unsigned char PatternFillByte = 0xAB;
 
 enum FillContentsMode {
diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
--- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
+++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
@@ -121,6 +121,18 @@
 #define M_MEMTAG_TUNING -102
 #endif
 
+#ifndef M_CACHE_COUNT_MAX
+#define M_CACHE_COUNT_MAX -200
+#endif
+
+#ifndef M_CACHE_SIZE_MAX
+#define M_CACHE_SIZE_MAX -201
+#endif
+
+#ifndef M_TSDS_COUNT_MAX
+#define M_TSDS_COUNT_MAX -202
+#endif
+
 enum scudo_memtag_tuning {
   // Tune for buffer overflows.
   M_MEMTAG_TUNING_BUFFER_OVERFLOW,
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -86,7 +86,7 @@
       if (Sci->CanRelease)
         Sci->ReleaseInfo.LastReleaseAtNs = Time;
     }
-    setReleaseToOsIntervalMs(ReleaseToOsInterval);
+    setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
   }
   void init(s32 ReleaseToOsInterval) {
     memset(this, 0, sizeof(*this));
@@ -184,13 +184,16 @@
       getStats(Str, I, 0);
   }
 
-  void setReleaseToOsIntervalMs(s32 Interval) {
-    if (Interval >= MaxReleaseToOsIntervalMs) {
-      Interval = MaxReleaseToOsIntervalMs;
-    } else if (Interval <= MinReleaseToOsIntervalMs) {
-      Interval = MinReleaseToOsIntervalMs;
+  bool setOption(Option O, sptr Value) {
+    if (O == Option::ReleaseInterval) {
+      const s32 Interval =
+          Max(Min(static_cast<s32>(Value), MaxReleaseToOsIntervalMs),
+              MinReleaseToOsIntervalMs);
+      atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed);
+      return true;
     }
-    atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed);
+    // Not supported by the Primary, but not an error either.
+    return true;
   }
 
   uptr releaseToOS() {
@@ -423,10 +426,6 @@
                 AvailableChunks, Rss >> 10, Sci->ReleaseInfo.RangesReleased);
   }
 
-  s32 getReleaseToOsIntervalMs() {
-    return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed);
-  }
-
   NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId,
                                  bool Force = false) {
     const uptr BlockSize = getSizeByClassId(ClassId);
@@ -457,7 +456,8 @@
     }
 
     if (!Force) {
-      const s32 IntervalMs = getReleaseToOsIntervalMs();
+      const s32 IntervalMs =
+          atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed);
       if (IntervalMs < 0)
         return 0;
       if (Sci->ReleaseInfo.LastReleaseAtNs +
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -91,7 +91,7 @@
       if (Region->CanRelease)
         Region->ReleaseInfo.LastReleaseAtNs = Time;
     }
-    setReleaseToOsIntervalMs(ReleaseToOsInterval);
+    setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
 
     if (SupportsMemoryTagging)
       UseMemoryTagging = systemSupportsMemoryTagging();
@@ -185,13 +185,16 @@
       getStats(Str, I, 0);
   }
 
-  void setReleaseToOsIntervalMs(s32 Interval) {
-    if (Interval >= MaxReleaseToOsIntervalMs) {
-      Interval = MaxReleaseToOsIntervalMs;
-    } else if (Interval <= MinReleaseToOsIntervalMs) {
-      Interval = MinReleaseToOsIntervalMs;
+  bool setOption(Option O, sptr Value) {
+    if (O == Option::ReleaseInterval) {
+      const s32 Interval =
+          Max(Min(static_cast<s32>(Value), MaxReleaseToOsIntervalMs),
+              MinReleaseToOsIntervalMs);
+      atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed);
+      return true;
     }
-    atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed);
+    // Not supported by the Primary, but not an error either.
+    return true;
   }
 
   uptr releaseToOS() {
@@ -435,10 +438,6 @@
                 getRegionBaseByClassId(ClassId));
   }
 
-  s32 getReleaseToOsIntervalMs() {
-    return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed);
-  }
-
   NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
                                  bool Force = false) {
     const uptr BlockSize = getSizeByClassId(ClassId);
@@ -469,7 +468,8 @@
     }
 
     if (!Force) {
-      const s32 IntervalMs = getReleaseToOsIntervalMs();
+      const s32 IntervalMs =
+          atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed);
       if (IntervalMs < 0)
         return 0;
       if (Region->ReleaseInfo.LastReleaseAtNs +
diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -56,14 +56,21 @@
     return false;
   }
   bool store(UNUSED LargeBlock::Header *H) { return false; }
-  static bool canCache(UNUSED uptr Size) { return false; }
+  bool canCache(UNUSED uptr Size) { return false; }
   void disable() {}
   void enable() {}
   void releaseToOS() {}
-  void setReleaseToOsIntervalMs(UNUSED s32 Interval) {}
+  bool setOption(Option O, UNUSED sptr Value) {
+    if (O == Option::ReleaseInterval || O == Option::MaxCacheEntriesCount ||
+        O == Option::MaxCacheEntrySize)
+      return false;
+    // Not supported by the Secondary Cache, but not an error either.
+    return true;
+  }
 };
 
-template <uptr MaxEntriesCount = 32U, uptr MaxEntrySize = 1UL << 19,
+template <u32 EntriesArraySize = 32U, u32 DefaultMaxEntriesCount = 32U,
+          uptr DefaultMaxEntrySize = 1UL << 19,
           s32 MinReleaseToOsIntervalMs = INT32_MIN,
           s32 MaxReleaseToOsIntervalMs = INT32_MAX>
 class MapAllocatorCache {
@@ -71,10 +78,17 @@
   // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length
   // arrays are an extension for some compilers.
   // FIXME(kostyak): support (partially) the cache on Fuchsia.
-  static_assert(!SCUDO_FUCHSIA || MaxEntriesCount == 0U, "");
+  static_assert(!SCUDO_FUCHSIA || EntriesArraySize == 0U, "");
+
+  // Ensure the default maximum specified fits the array.
+  static_assert(DefaultMaxEntriesCount <= EntriesArraySize, "");
 
   void initLinkerInitialized(s32 ReleaseToOsInterval) {
-    setReleaseToOsIntervalMs(ReleaseToOsInterval);
+    setOption(Option::MaxCacheEntriesCount,
+              static_cast<sptr>(DefaultMaxEntriesCount));
+    setOption(Option::MaxCacheEntrySize,
+              static_cast<sptr>(DefaultMaxEntrySize));
+    setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
   }
   void init(s32 ReleaseToOsInterval) {
     memset(this, 0, sizeof(*this));
@@ -85,13 +99,14 @@
     bool EntryCached = false;
     bool EmptyCache = false;
     const u64 Time = getMonotonicTime();
+    const u32 MaxCount = atomic_load(&MaxEntriesCount, memory_order_relaxed);
     {
       ScopedLock L(Mutex);
-      if (EntriesCount == MaxEntriesCount) {
+      if (EntriesCount >= MaxCount) {
         if (IsFullEvents++ == 4U)
           EmptyCache = true;
       } else {
-        for (uptr I = 0; I < MaxEntriesCount; I++) {
+        for (u32 I = 0; I < MaxCount; I++) {
           if (Entries[I].Block)
             continue;
           if (I != 0)
@@ -111,17 +126,19 @@
     s32 Interval;
     if (EmptyCache)
       empty();
-    else if ((Interval = getReleaseToOsIntervalMs()) >= 0)
+    else if ((Interval = atomic_load(&ReleaseToOsIntervalMs,
+                                     memory_order_relaxed)) >= 0)
       releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000);
     return EntryCached;
   }
 
   bool retrieve(uptr Size, LargeBlock::Header **H) {
     const uptr PageSize = getPageSizeCached();
+    const u32 MaxCount = atomic_load(&MaxEntriesCount, memory_order_relaxed);
     ScopedLock L(Mutex);
     if (EntriesCount == 0)
       return false;
-    for (uptr I = 0; I < MaxEntriesCount; I++) {
+    for (u32 I = 0; I < MaxCount; I++) {
       if (!Entries[I].Block)
         continue;
       const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block;
@@ -141,17 +158,31 @@
     return false;
   }
 
-  static bool canCache(uptr Size) {
-    return MaxEntriesCount != 0U && Size <= MaxEntrySize;
+  bool canCache(uptr Size) {
+    return atomic_load(&MaxEntriesCount, memory_order_relaxed) != 0U &&
+           Size <= atomic_load(&MaxEntrySize, memory_order_relaxed);
   }
 
-  void setReleaseToOsIntervalMs(s32 Interval) {
-    if (Interval >= MaxReleaseToOsIntervalMs) {
-      Interval = MaxReleaseToOsIntervalMs;
-    } else if (Interval <= MinReleaseToOsIntervalMs) {
-      Interval = MinReleaseToOsIntervalMs;
+  bool setOption(Option O, sptr Value) {
+    if (O == Option::ReleaseInterval) {
+      const s32 Interval =
+          Max(Min(static_cast<s32>(Value), MaxReleaseToOsIntervalMs),
+              MinReleaseToOsIntervalMs);
+      atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed);
+      return true;
+    } else if (O == Option::MaxCacheEntriesCount) {
+      const u32 MaxCount = static_cast<u32>(Value);
+      if (MaxCount > EntriesArraySize)
+        return false;
+      atomic_store(&MaxEntriesCount, MaxCount, memory_order_relaxed);
+      return true;
+    } else if (O == Option::MaxCacheEntrySize) {
+      atomic_store(&MaxEntrySize, static_cast<uptr>(Value),
+                   memory_order_relaxed);
+      return true;
     }
-    atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed);
+    // Not supported by the Secondary Cache, but not an error either.
+    return true;
   }
 
   void releaseToOS() { releaseOlderThan(UINT64_MAX); }
@@ -166,11 +197,11 @@
       void *MapBase;
       uptr MapSize;
       MapPlatformData Data;
-    } MapInfo[MaxEntriesCount];
+    } MapInfo[EntriesArraySize];
     uptr N = 0;
     {
       ScopedLock L(Mutex);
-      for (uptr I = 0; I < MaxEntriesCount; I++) {
+      for (uptr I = 0; I < EntriesArraySize; I++) {
         if (!Entries[I].Block)
           continue;
         MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase);
@@ -191,7 +222,7 @@
     ScopedLock L(Mutex);
     if (!EntriesCount)
       return;
-    for (uptr I = 0; I < MaxEntriesCount; I++) {
+    for (uptr I = 0; I < EntriesArraySize; I++) {
       if (!Entries[I].Block || !Entries[I].Time || Entries[I].Time > Time)
         continue;
       releasePagesToOS(Entries[I].Block, 0,
@@ -201,10 +232,6 @@
     }
   }
 
-  s32 getReleaseToOsIntervalMs() {
-    return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed);
-  }
-
   struct CachedBlock {
     uptr Block;
     uptr BlockEnd;
@@ -215,8 +242,10 @@
   };
 
   HybridMutex Mutex;
-  CachedBlock Entries[MaxEntriesCount];
+  CachedBlock Entries[EntriesArraySize];
   u32 EntriesCount;
+  atomic_u32 MaxEntriesCount;
+  atomic_uptr MaxEntrySize;
   uptr LargestSize;
   u32 IsFullEvents;
   atomic_s32 ReleaseToOsIntervalMs;
@@ -265,11 +294,9 @@
       Callback(reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize());
   }
 
-  static uptr canCache(uptr Size) { return CacheT::canCache(Size); }
+  uptr canCache(uptr Size) { return Cache.canCache(Size); }
 
-  void setReleaseToOsIntervalMs(s32 Interval) {
-    Cache.setReleaseToOsIntervalMs(Interval);
-  }
+  bool setOption(Option O, sptr Value) { return Cache.setOption(O, Value); }
 
   void releaseToOS() { Cache.releaseToOS(); }
 
@@ -306,7 +333,7 @@
   const uptr RoundedSize =
       roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize);
 
-  if (AlignmentHint < PageSize && CacheT::canCache(RoundedSize)) {
+  if (AlignmentHint < PageSize && Cache.canCache(RoundedSize)) {
     LargeBlock::Header *H;
     if (Cache.retrieve(RoundedSize, &H)) {
       if (BlockEnd)
@@ -400,7 +427,7 @@
     Stats.sub(StatAllocated, CommitSize);
     Stats.sub(StatMapped, H->MapSize);
   }
-  if (CacheT::canCache(CommitSize) && Cache.store(H))
+  if (Cache.canCache(CommitSize) && Cache.store(H))
     return;
   void *Addr = reinterpret_cast<void *>(H->MapBase);
   const uptr Size = H->MapSize;
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -19,7 +19,7 @@
 
 static std::mutex Mutex;
 static std::condition_variable Cv;
-static bool Ready = false;
+static bool Ready;
 
 static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc;
 
@@ -351,6 +351,7 @@
 }
 
 template <class Config> static void testAllocatorThreaded() {
+  Ready = false;
   using AllocatorT = TestAllocator<Config>;
   auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
   std::thread Threads[32];
@@ -394,7 +395,7 @@
   typedef scudo::SizeClassAllocator64<DeathSizeClassMap, DeathRegionSizeLog>
       Primary;
   typedef scudo::MapAllocator<scudo::MapAllocatorNoCache> Secondary;
-  template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U>;
+  template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>;
 };
 
 TEST(ScudoCombinedTest, DeathCombined) {
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -149,7 +149,7 @@
 
 static std::mutex Mutex;
 static std::condition_variable Cv;
-static bool Ready = false;
+static bool Ready;
 
 template <typename Primary> static void performAllocations(Primary *Allocator) {
   static THREADLOCAL typename Primary::CacheT Cache;
@@ -176,6 +176,7 @@
 }
 
 template <typename Primary> static void testPrimaryThreaded() {
+  Ready = false;
   auto Deleter = [](Primary *P) {
     P->unmapTestOnly();
     delete P;
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -21,7 +21,7 @@
 template <class SecondaryT> static void testSecondaryBasic(void) {
   scudo::GlobalStats S;
   S.init();
-  SecondaryT *L = new SecondaryT;
+  std::unique_ptr<SecondaryT> L(new SecondaryT);
   L->init(&S);
   const scudo::uptr Size = 1U << 16;
   void *P = L->allocate(Size);
@@ -30,7 +30,7 @@
   EXPECT_GE(SecondaryT::getBlockSize(P), Size);
   L->deallocate(P);
   // If the Secondary can't cache that pointer, it will be unmapped.
-  if (!SecondaryT::canCache(Size))
+  if (!L->canCache(Size))
     EXPECT_DEATH(memset(P, 'A', Size), "");
 
   const scudo::uptr Align = 1U << 16;
@@ -59,7 +59,7 @@
 #if !SCUDO_FUCHSIA
   testSecondaryBasic<scudo::MapAllocator<scudo::MapAllocatorCache<>>>();
   testSecondaryBasic<
-      scudo::MapAllocator<scudo::MapAllocatorCache<64U, 1UL << 20>>>();
+      scudo::MapAllocator<scudo::MapAllocatorCache<128U, 64U, 1UL << 20>>>();
 #endif
 }
 
@@ -75,7 +75,7 @@
 TEST(ScudoSecondaryTest, SecondaryCombinations) {
   constexpr scudo::uptr MinAlign = FIRST_32_SECOND_64(8, 16);
   constexpr scudo::uptr HeaderSize = scudo::roundUpTo(8, MinAlign);
-  LargeAllocator *L = new LargeAllocator;
+  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
   L->init(nullptr);
   for (scudo::uptr SizeLog = 0; SizeLog <= 20; SizeLog++) {
     for (scudo::uptr AlignLog = FIRST_32_SECOND_64(3, 4); AlignLog <= 16;
@@ -103,7 +103,7 @@
 }
 
 TEST(ScudoSecondaryTest, SecondaryIterate) {
-  LargeAllocator *L = new LargeAllocator;
+  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
   L->init(nullptr);
   std::vector<void *> V;
   const scudo::uptr PageSize = scudo::getPageSizeCached();
@@ -125,9 +125,32 @@
   Str.output();
 }
 
+TEST(ScudoSecondaryTest, SecondaryOptions) {
+  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
+  L->init(nullptr);
+  // Attempt to set a maximum number of entries higher than the array size.
+  EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4096U));
+  // A negative number will be cast to a scudo::u32, and fail.
+  EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, -1));
+  if (L->canCache(0U)) {
+    // Various valid combinations.
+    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U));
+    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20));
+    EXPECT_TRUE(L->canCache(1UL << 18));
+    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17));
+    EXPECT_FALSE(L->canCache(1UL << 18));
+    EXPECT_TRUE(L->canCache(1UL << 16));
+    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 0U));
+    EXPECT_FALSE(L->canCache(1UL << 16));
+    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U));
+    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20));
+    EXPECT_TRUE(L->canCache(1UL << 16));
+  }
+}
+
 static std::mutex Mutex;
 static std::condition_variable Cv;
-static bool Ready = false;
+static bool Ready;
 
 static void performAllocations(LargeAllocator *L) {
   std::vector<void *> V;
@@ -153,11 +176,12 @@
 }
 
 TEST(ScudoSecondaryTest, SecondaryThreadsRace) {
-  LargeAllocator *L = new LargeAllocator;
+  Ready = false;
+  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
   L->init(nullptr, /*ReleaseToOsInterval=*/0);
   std::thread Threads[16];
   for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++)
-    Threads[I] = std::thread(performAllocations, L);
+    Threads[I] = std::thread(performAllocations, L.get());
   {
     std::unique_lock<std::mutex> Lock(Mutex);
     Ready = true;
diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
@@ -13,6 +13,7 @@
 
 #include <condition_variable>
 #include <mutex>
+#include <set>
 #include <thread>
 
 // We mock out an allocator with a TSD registry, mostly using empty stubs. The
@@ -47,12 +48,12 @@
 
 struct OneCache {
   template <class Allocator>
-  using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 1U>;
+  using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 1U, 1U>;
 };
 
 struct SharedCaches {
   template <class Allocator>
-  using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 16U>;
+  using TSDRegistryT = scudo::TSDRegistrySharedT<Allocator, 16U, 8U>;
 };
 
 struct ExclusiveCaches {
@@ -116,7 +117,7 @@
 
 static std::mutex Mutex;
 static std::condition_variable Cv;
-static bool Ready = false;
+static bool Ready;
 
 template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
   auto Registry = Allocator->getTSDRegistry();
@@ -145,6 +146,7 @@
 }
 
 template <class AllocatorT> static void testRegistryThreaded() {
+  Ready = false;
   auto Deleter = [](AllocatorT *A) {
     A->unmapTestOnly();
     delete A;
@@ -171,3 +173,73 @@
   testRegistryThreaded<MockAllocator<ExclusiveCaches>>();
 #endif
 }
+
+static std::set<void *> Pointers;
+
+static void stressSharedRegistry(MockAllocator<SharedCaches> *Allocator) {
+  std::set<void *> Set;
+  auto Registry = Allocator->getTSDRegistry();
+  {
+    std::unique_lock<std::mutex> Lock(Mutex);
+    while (!Ready)
+      Cv.wait(Lock);
+  }
+  Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false);
+  bool UnlockRequired;
+  for (scudo::uptr I = 0; I < 4096U; I++) {
+    auto TSD = Registry->getTSDAndLock(&UnlockRequired);
+    EXPECT_NE(TSD, nullptr);
+    Set.insert(reinterpret_cast<void *>(TSD));
+    if (UnlockRequired)
+      TSD->unlock();
+  }
+  {
+    std::unique_lock<std::mutex> Lock(Mutex);
+    Pointers.insert(Set.begin(), Set.end());
+  }
+}
+
+TEST(ScudoTSDTest, TSDRegistryTSDsCount) {
+  Ready = false;
+  using AllocatorT = MockAllocator<SharedCaches>;
+  auto Deleter = [](AllocatorT *A) {
+    A->unmapTestOnly();
+    delete A;
+  };
+  std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
+                                                           Deleter);
+  Allocator->reset();
+  // We attempt to use as many TSDs as the shared cache offers by creating a
+  // decent amount of threads that will be run concurrently and attempt to get
+  // and lock TSDs. We put them all in a set and count the number of entries
+  // after we are done.
+  std::thread Threads[32];
+  for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++)
+    Threads[I] = std::thread(stressSharedRegistry, Allocator.get());
+  {
+    std::unique_lock<std::mutex> Lock(Mutex);
+    Ready = true;
+    Cv.notify_all();
+  }
+  for (auto &T : Threads)
+    T.join();
+  // The initial number of TSDs we get will be the minimum of the default count
+  // and the number of CPUs.
+  EXPECT_LE(Pointers.size(), 8U);
+  Pointers.clear();
+  auto Registry = Allocator->getTSDRegistry();
+  // Increase the number of TSDs to 16.
+  Registry->setOption(scudo::Option::MaxTSDsCount, 16);
+  Ready = false;
+  for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++)
+    Threads[I] = std::thread(stressSharedRegistry, Allocator.get());
+  {
+    std::unique_lock<std::mutex> Lock(Mutex);
+    Ready = true;
+    Cv.notify_all();
+  }
+  for (auto &T : Threads)
+    T.join();
+  // We should get 16 distinct TSDs back.
+  EXPECT_EQ(Pointers.size(), 16U);
+}
diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
@@ -389,6 +389,7 @@
 
 TEST(ScudoWrappersCTest, DisableForkEnable) {
   pthread_t ThreadId;
+  Ready = false;
   EXPECT_EQ(pthread_create(&ThreadId, nullptr, &enableMalloc, nullptr), 0);
 
   // Wait for the thread to be warmed up.
diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp
--- a/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp
@@ -79,7 +79,7 @@
 
 static std::mutex Mutex;
 static std::condition_variable Cv;
-static bool Ready = false;
+static bool Ready;
 
 static void stressNew() {
   std::vector<uintptr_t *> V;
@@ -103,6 +103,7 @@
 }
 
 TEST(ScudoWrappersCppTest, ThreadedNew) {
+  Ready = false;
   std::thread Threads[32];
   for (size_t I = 0U; I < sizeof(Threads) / sizeof(Threads[0]); I++)
     Threads[I] = std::thread(stressNew);
diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
--- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
@@ -66,6 +66,12 @@
     Mutex.unlock();
   }
 
+  bool setOption(Option O, UNUSED sptr Value) {
+    if (O == Option::MaxTSDsCount)
+      return false;
+    return true;
+  }
+
 private:
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -14,31 +14,16 @@
 
 namespace scudo {
 
-template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
+template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
+struct TSDRegistrySharedT {
   void initLinkerInitialized(Allocator *Instance) {
     Instance->initLinkerInitialized();
     CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS
-    const u32 NumberOfCPUs = getNumberOfCPUs();
-    NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0)
-                       ? MaxTSDCount
-                       : Min(NumberOfCPUs, MaxTSDCount);
-    for (u32 I = 0; I < NumberOfTSDs; I++)
+    for (u32 I = 0; I < TSDsArraySize; I++)
       TSDs[I].initLinkerInitialized(Instance);
-    // Compute all the coprimes of NumberOfTSDs. This will be used to walk the
-    // array of TSDs in a random order. For details, see:
-    // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/
-    for (u32 I = 0; I < NumberOfTSDs; I++) {
-      u32 A = I + 1;
-      u32 B = NumberOfTSDs;
-      // Find the GCD between I + 1 and NumberOfTSDs. If 1, they are coprimes.
-      while (B != 0) {
-        const u32 T = A;
-        A = B;
-        B = T % B;
-      }
-      if (A == 1)
-        CoPrimes[NumberOfCoPrimes++] = I + 1;
-    }
+    const u32 NumberOfCPUs = getNumberOfCPUs();
+    setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount
+                                        : Min(NumberOfCPUs, DefaultTSDCount));
     Initialized = true;
   }
   void init(Allocator *Instance) {
@@ -66,21 +51,34 @@
     if (TSD->tryLock())
       return TSD;
     // If that fails, go down the slow path.
+    if (TSDsArraySize == 1U) {
+      // Only 1 TSD, not need to go any further.
+      // The compiler will optimize this one way or the other.
+      TSD->lock();
+      return TSD;
+    }
     return getTSDAndLockSlow(TSD);
   }
 
   void disable() {
     Mutex.lock();
-    for (u32 I = 0; I < NumberOfTSDs; I++)
+    for (u32 I = 0; I < TSDsArraySize; I++)
       TSDs[I].lock();
   }
 
   void enable() {
-    for (s32 I = static_cast<s32>(NumberOfTSDs - 1); I >= 0; I--)
+    for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--)
       TSDs[I].unlock();
     Mutex.unlock();
   }
 
+  bool setOption(Option O, sptr Value) {
+    if (O == Option::MaxTSDsCount)
+      return setNumberOfTSDs(static_cast<u32>(Value));
+    // Not supported by the TSD Registry, but not an error either.
+    return true;
+  }
+
 private:
   ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) {
 #if _BIONIC
@@ -104,6 +102,32 @@
 #endif
   }
 
+  bool setNumberOfTSDs(u32 N) {
+    ScopedLock L(MutexTSDs);
+    if (N < NumberOfTSDs)
+      return false;
+    if (N > TSDsArraySize)
+      N = TSDsArraySize;
+    NumberOfTSDs = N;
+    NumberOfCoPrimes = 0;
+    // Compute all the coprimes of NumberOfTSDs. This will be used to walk the
+    // array of TSDs in a random order. For details, see:
+    // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/
+    for (u32 I = 0; I < N; I++) {
+      u32 A = I + 1;
+      u32 B = N;
+      // Find the GCD between I + 1 and N. If 1, they are coprimes.
+      while (B != 0) {
+        const u32 T = A;
+        A = B;
+        B = T % B;
+      }
+      if (A == 1)
+        CoPrimes[NumberOfCoPrimes++] = I + 1;
+    }
+    return true;
+  }
+
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
     if (LIKELY(Initialized))
@@ -120,17 +144,23 @@
   }
 
   NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD) {
-    if (MaxTSDCount > 1U && NumberOfTSDs > 1U) {
-      // Use the Precedence of the current TSD as our random seed. Since we are
-      // in the slow path, it means that tryLock failed, and as a result it's
-      // very likely that said Precedence is non-zero.
-      const u32 R = static_cast<u32>(CurrentTSD->getPrecedence());
-      const u32 Inc = CoPrimes[R % NumberOfCoPrimes];
-      u32 Index = R % NumberOfTSDs;
+    // Use the Precedence of the current TSD as our random seed. Since we are
+    // in the slow path, it means that tryLock failed, and as a result it's
+    // very likely that said Precedence is non-zero.
+    const u32 R = static_cast<u32>(CurrentTSD->getPrecedence());
+    u32 N, Inc;
+    {
+      ScopedLock L(MutexTSDs);
+      N = NumberOfTSDs;
+      DCHECK_NE(NumberOfCoPrimes, 0U);
+      Inc = CoPrimes[R % NumberOfCoPrimes];
+    }
+    if (N > 1U) {
+      u32 Index = R % N;
       uptr LowestPrecedence = UINTPTR_MAX;
       TSD<Allocator> *CandidateTSD = nullptr;
       // Go randomly through at most 4 contexts and find a candidate.
-      for (u32 I = 0; I < Min(4U, NumberOfTSDs); I++) {
+      for (u32 I = 0; I < Min(4U, N); I++) {
         if (TSDs[Index].tryLock()) {
           setCurrentTSD(&TSDs[Index]);
           return &TSDs[Index];
@@ -142,8 +172,8 @@
           LowestPrecedence = Precedence;
         }
         Index += Inc;
-        if (Index >= NumberOfTSDs)
-          Index -= NumberOfTSDs;
+        if (Index >= N)
+          Index -= N;
       }
       if (CandidateTSD) {
         CandidateTSD->lock();
@@ -160,19 +190,20 @@
   atomic_u32 CurrentIndex;
   u32 NumberOfTSDs;
   u32 NumberOfCoPrimes;
-  u32 CoPrimes[MaxTSDCount];
+  u32 CoPrimes[TSDsArraySize];
   bool Initialized;
   HybridMutex Mutex;
-  TSD<Allocator> TSDs[MaxTSDCount];
+  HybridMutex MutexTSDs;
+  TSD<Allocator> TSDs[TSDsArraySize];
 #if SCUDO_LINUX && !_BIONIC
   static THREADLOCAL TSD<Allocator> *ThreadTSD;
 #endif
 };
 
 #if SCUDO_LINUX && !_BIONIC
-template <class Allocator, u32 MaxTSDCount>
+template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
 THREADLOCAL TSD<Allocator>
-    *TSDRegistrySharedT<Allocator, MaxTSDCount>::ThreadTSD;
+    *TSDRegistrySharedT<Allocator, TSDsArraySize, DefaultTSDCount>::ThreadTSD;
 #endif
 
 } // namespace scudo