Index: compiler-rt/trunk/lib/scudo/standalone/CMakeLists.txt
===================================================================
--- compiler-rt/trunk/lib/scudo/standalone/CMakeLists.txt
+++ compiler-rt/trunk/lib/scudo/standalone/CMakeLists.txt
@@ -57,10 +57,12 @@
 endif()
 
 set(SCUDO_HEADERS
+  allocator_config.h
   atomic_helpers.h
   bytemap.h
   checksum.h
   chunk.h
+  combined.h
   flags.h
   flags_parser.h
   fuchsia.h
Index: compiler-rt/trunk/lib/scudo/standalone/allocator_config.h
===================================================================
--- compiler-rt/trunk/lib/scudo/standalone/allocator_config.h
+++ compiler-rt/trunk/lib/scudo/standalone/allocator_config.h
@@ -0,0 +1,80 @@
+//===-- allocator_config.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_ALLOCATOR_CONFIG_H_
+#define SCUDO_ALLOCATOR_CONFIG_H_
+
+#include "combined.h"
+#include "common.h"
+#include "flags.h"
+#include "primary32.h"
+#include "primary64.h"
+#include "size_class_map.h"
+#include "tsd_exclusive.h"
+#include "tsd_shared.h"
+
+namespace scudo {
+
+// Default configurations for various platforms.
+
+struct DefaultConfig {
+  using SizeClassMap = DefaultSizeClassMap;
+#if SCUDO_CAN_USE_PRIMARY64
+  // 1GB Regions
+  typedef SizeClassAllocator64<SizeClassMap, 30U> Primary;
+#else
+  // 512KB regions
+  typedef SizeClassAllocator32<SizeClassMap, 19U> Primary;
+#endif
+  template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive
+};
+
+struct AndroidConfig {
+  using SizeClassMap = AndroidSizeClassMap;
+#if SCUDO_CAN_USE_PRIMARY64
+  // 1GB regions
+  typedef SizeClassAllocator64<SizeClassMap, 30U> Primary;
+#else
+  // 512KB regions
+  typedef SizeClassAllocator32<SizeClassMap, 19U> Primary;
+#endif
+  template <class A>
+  using TSDRegistryT = TSDRegistrySharedT<A, 2U>; // Shared, max 2 TSDs.
+};
+
+struct AndroidSvelteConfig {
+  using SizeClassMap = SvelteSizeClassMap;
+#if SCUDO_CAN_USE_PRIMARY64
+  // 512MB regions
+  typedef SizeClassAllocator64<SizeClassMap, 29U> Primary;
+#else
+  // 256KB regions
+  typedef SizeClassAllocator32<SizeClassMap, 18U> Primary;
+#endif
+  template <class A>
+  using TSDRegistryT = TSDRegistrySharedT<A, 1U>; // Shared, only 1 TSD.
+};
+
+struct FuchsiaConfig {
+  // 1GB Regions
+  typedef SizeClassAllocator64<DefaultSizeClassMap, 30U> Primary;
+  template <class A>
+  using TSDRegistryT = TSDRegistrySharedT<A, 8U>; // Shared, max 8 TSDs.
+};
+
+#if SCUDO_ANDROID
+typedef AndroidConfig Config;
+#elif SCUDO_FUCHSIA
+typedef FuchsiaConfig Config;
+#else
+typedef DefaultConfig Config;
+#endif
+
+} // namespace scudo
+
+#endif // SCUDO_ALLOCATOR_CONFIG_H_
Index: compiler-rt/trunk/lib/scudo/standalone/combined.h
===================================================================
--- compiler-rt/trunk/lib/scudo/standalone/combined.h
+++ compiler-rt/trunk/lib/scudo/standalone/combined.h
@@ -0,0 +1,550 @@
+//===-- combined.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_COMBINED_H_
+#define SCUDO_COMBINED_H_
+
+#include "chunk.h"
+#include "common.h"
+#include "flags.h"
+#include "flags_parser.h"
+#include "interface.h"
+#include "local_cache.h"
+#include "quarantine.h"
+#include "report.h"
+#include "secondary.h"
+#include "tsd.h"
+
+namespace scudo {
+
+template <class Params> class Allocator {
+public:
+  using PrimaryT = typename Params::Primary;
+  using CacheT = typename PrimaryT::CacheT;
+  typedef Allocator<Params> ThisT;
+  typedef typename Params::template TSDRegistryT<ThisT> TSDRegistryT;
+
+  struct QuarantineCallback {
+    explicit QuarantineCallback(ThisT &Instance, CacheT &LocalCache)
+        : Allocator(Instance), Cache(LocalCache) {}
+
+    // Chunk recycling function, returns a quarantined chunk to the backend,
+    // first making sure it hasn't been tampered with.
+    void recycle(void *Ptr) {
+      Chunk::UnpackedHeader Header;
+      Chunk::loadHeader(Allocator.Cookie, Ptr, &Header);
+      if (UNLIKELY(Header.State != Chunk::State::Quarantined))
+        reportInvalidChunkState(AllocatorAction::Recycling, Ptr);
+
+      Chunk::UnpackedHeader NewHeader = Header;
+      NewHeader.State = Chunk::State::Available;
+      Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header);
+
+      void *BlockBegin = Chunk::getBlockBegin(Ptr, &Header);
+      const uptr ClassId = Header.ClassId;
+      if (ClassId)
+        Cache.deallocate(ClassId, BlockBegin);
+      else
+        Allocator.Secondary.deallocate(BlockBegin);
+    }
+
+    // We take a shortcut when allocating a quarantine batch by working with the
+    // appropriate class ID instead of using Size. The compiler should optimize
+    // the class ID computation and work with the associated cache directly.
+    void *allocate(UNUSED uptr Size) {
+      const uptr QuarantineClassId = SizeClassMap::getClassIdBySize(
+          sizeof(QuarantineBatch) + Chunk::getHeaderSize());
+      void *Ptr = Cache.allocate(QuarantineClassId);
+      // Quarantine batch allocation failure is fatal.
+      if (UNLIKELY(!Ptr))
+        reportOutOfMemory(SizeClassMap::getSizeByClassId(QuarantineClassId));
+
+      Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) +
+                                     Chunk::getHeaderSize());
+      Chunk::UnpackedHeader Header = {};
+      Header.ClassId = QuarantineClassId & Chunk::ClassIdMask;
+      Header.SizeOrUnusedBytes = sizeof(QuarantineBatch);
+      Header.State = Chunk::State::Allocated;
+      Chunk::storeHeader(Allocator.Cookie, Ptr, &Header);
+
+      return Ptr;
+    }
+
+    void deallocate(void *Ptr) {
+      const uptr QuarantineClassId = SizeClassMap::getClassIdBySize(
+          sizeof(QuarantineBatch) + Chunk::getHeaderSize());
+      Chunk::UnpackedHeader Header;
+      Chunk::loadHeader(Allocator.Cookie, Ptr, &Header);
+
+      if (UNLIKELY(Header.State != Chunk::State::Allocated))
+        reportInvalidChunkState(AllocatorAction::Deallocating, Ptr);
+      DCHECK_EQ(Header.ClassId, QuarantineClassId);
+      DCHECK_EQ(Header.Offset, 0);
+      DCHECK_EQ(Header.SizeOrUnusedBytes, sizeof(QuarantineBatch));
+
+      Chunk::UnpackedHeader NewHeader = Header;
+      NewHeader.State = Chunk::State::Available;
+      Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header);
+      Cache.deallocate(QuarantineClassId,
+                       reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) -
+                                                Chunk::getHeaderSize()));
+    }
+
+  private:
+    ThisT &Allocator;
+    CacheT &Cache;
+  };
+
+  typedef GlobalQuarantine<QuarantineCallback, void> QuarantineT;
+  typedef typename QuarantineT::CacheT QuarantineCacheT;
+
+  void initLinkerInitialized() {
+    performSanityChecks();
+
+    // Check if hardware CRC32 is supported in the binary and by the platform,
+    // if so, opt for the CRC32 hardware version of the checksum.
+    if (&computeHardwareCRC32 && hasHardwareCRC32())
+      HashAlgorithm = Checksum::HardwareCRC32;
+
+    if (UNLIKELY(!getRandom(&Cookie, sizeof(Cookie))))
+      Cookie = static_cast<u32>(getMonotonicTime() ^
+                                (reinterpret_cast<uptr>(this) >> 4));
+
+    initFlags();
+    reportUnrecognizedFlags();
+
+    // Store some flags locally.
+    Options.MayReturnNull = getFlags()->may_return_null;
+    Options.ZeroContents = getFlags()->zero_contents;
+    Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch;
+    Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch;
+    Options.QuarantineMaxChunkSize = getFlags()->quarantine_max_chunk_size;
+
+    Stats.initLinkerInitialized();
+    Primary.initLinkerInitialized(getFlags()->release_to_os_interval_ms);
+    Secondary.initLinkerInitialized(&Stats);
+
+    Quarantine.init(getFlags()->quarantine_size_kb << 10,
+                    getFlags()->thread_local_quarantine_size_kb << 10);
+  }
+
+  void reset() { memset(this, 0, sizeof(*this)); }
+
+  void unmapTestOnly() {
+    TSDRegistry.unmapTestOnly();
+    Primary.unmapTestOnly();
+  }
+
+  TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
+
+  void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
+
+  // Release the resources used by a TSD, which involves:
+  // - draining the local quarantine cache to the global quarantine;
+  // - releasing the cached pointers back to the Primary;
+  // - unlinking the local stats from the global ones (destroying the cache does
+  //   the last two items).
+  void commitBack(TSD<ThisT> *TSD) {
+    Quarantine.drain(&TSD->QuarantineCache,
+                     QuarantineCallback(*this, TSD->Cache));
+    TSD->Cache.destroy(&Stats);
+  }
+
+  NOINLINE void *allocate(uptr Size, Chunk::Origin Origin,
+                          uptr Alignment = MinAlignment,
+                          bool ZeroContents = false) {
+    initThreadMaybe();
+
+    if (UNLIKELY(Alignment > MaxAlignment)) {
+      if (Options.MayReturnNull)
+        return nullptr;
+      reportAlignmentTooBig(Alignment, MaxAlignment);
+    }
+    if (UNLIKELY(Alignment < MinAlignment))
+      Alignment = MinAlignment;
+
+    // If the requested size happens to be 0 (more common than you might think),
+    // allocate 1 byte on top of the header. Then add the extra bytes required
+    // to fulfill the alignment requirements: we allocate enough to be sure that
+    // there will be an address in the block that will satisfy the alignment.
+    const uptr NeededSize =
+        Chunk::getHeaderSize() + roundUpTo(Size ? Size : 1, MinAlignment) +
+        ((Alignment > MinAlignment) ? (Alignment - Chunk::getHeaderSize()) : 0);
+
+    // Takes care of extravagantly large sizes as well as integer overflows.
+    if (UNLIKELY(Size >= MaxAllowedMallocSize ||
+                 NeededSize >= MaxAllowedMallocSize)) {
+      if (Options.MayReturnNull)
+        return nullptr;
+      reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize);
+    }
+
+    void *Block;
+    uptr ClassId;
+    uptr BlockEnd = 0;
+    if (PrimaryT::canAllocate(NeededSize)) {
+      ClassId = SizeClassMap::getClassIdBySize(NeededSize);
+      bool UnlockRequired;
+      auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
+      Block = TSD->Cache.allocate(ClassId);
+      if (UnlockRequired)
+        TSD->unlock();
+    } else {
+      ClassId = 0;
+      Block = Secondary.allocate(NeededSize, Alignment, &BlockEnd);
+    }
+
+    if (UNLIKELY(!Block)) {
+      if (Options.MayReturnNull)
+        return nullptr;
+      reportOutOfMemory(NeededSize);
+    }
+
+    // We only need to zero the contents for Primary backed allocations.
+    if ((ZeroContents || Options.ZeroContents) && ClassId)
+      memset(Block, 0, PrimaryT::getSizeByClassId(ClassId));
+
+    Chunk::UnpackedHeader Header = {};
+    uptr UserPtr = reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
+    // The following condition isn't necessarily "UNLIKELY".
+    if (!isAligned(UserPtr, Alignment)) {
+      const uptr AlignedUserPtr = roundUpTo(UserPtr, Alignment);
+      const uptr Offset = AlignedUserPtr - UserPtr;
+      Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask;
+      DCHECK_GT(Offset, 2 * sizeof(u32));
+      // The BlockMarker has no security purpose, but is specifically meant for
+      // the chunk iteration function that can be used in debugging situations.
+      // It is the only situation where we have to locate the start of a chunk
+      // based on its block address.
+      reinterpret_cast<u32 *>(Block)[0] = BlockMarker;
+      reinterpret_cast<u32 *>(Block)[1] = static_cast<u32>(Offset);
+      UserPtr = AlignedUserPtr;
+    }
+    Header.State = Chunk::State::Allocated;
+    Header.Origin = Origin & Chunk::OriginMask;
+    if (ClassId) {
+      Header.ClassId = ClassId & Chunk::ClassIdMask;
+      Header.SizeOrUnusedBytes = Size & Chunk::SizeOrUnusedBytesMask;
+    } else {
+      Header.SizeOrUnusedBytes =
+          (BlockEnd - (UserPtr + Size)) & Chunk::SizeOrUnusedBytesMask;
+    }
+    void *Ptr = reinterpret_cast<void *>(UserPtr);
+    Chunk::storeHeader(Cookie, Ptr, &Header);
+
+    if (&__scudo_allocate_hook)
+      __scudo_allocate_hook(Ptr, Size);
+
+    return Ptr;
+  }
+
+  NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0,
+                           UNUSED uptr Alignment = MinAlignment) {
+    // For a deallocation, we only ensure minimal initialization, meaning thread
+    // local data will be left uninitialized for now (when using ELF TLS). The
+    // fallback cache will be used instead. This is a workaround for a situation
+    // where the only heap operation performed in a thread would be a free past
+    // the TLS destructors, ending up in initialized thread specific data never
+    // being destroyed properly. Any other heap operation will do a full init.
+    initThreadMaybe(/*MinimalInit=*/true);
+
+    if (&__scudo_deallocate_hook)
+      __scudo_deallocate_hook(Ptr);
+
+    if (UNLIKELY(!Ptr))
+      return;
+    if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)))
+      reportMisalignedPointer(AllocatorAction::Deallocating, Ptr);
+
+    Chunk::UnpackedHeader Header;
+    Chunk::loadHeader(Cookie, Ptr, &Header);
+
+    if (UNLIKELY(Header.State != Chunk::State::Allocated))
+      reportInvalidChunkState(AllocatorAction::Deallocating, Ptr);
+    if (Options.DeallocTypeMismatch) {
+      if (Header.Origin != Origin) {
+        // With the exception of memalign'd chunks, that can be still be free'd.
+        if (UNLIKELY(Header.Origin != Chunk::Origin::Memalign ||
+                     Origin != Chunk::Origin::Malloc))
+          reportDeallocTypeMismatch(AllocatorAction::Deallocating, Ptr,
+                                    Header.Origin, Origin);
+      }
+    }
+
+    const uptr Size = getSize(Ptr, &Header);
+    if (DeleteSize && Options.DeleteSizeMismatch) {
+      if (UNLIKELY(DeleteSize != Size))
+        reportDeleteSizeMismatch(Ptr, DeleteSize, Size);
+    }
+
+    quarantineOrDeallocateChunk(Ptr, &Header, Size);
+  }
+
+  void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) {
+    initThreadMaybe();
+
+    // The following cases are handled by the C wrappers.
+    DCHECK_NE(OldPtr, nullptr);
+    DCHECK_NE(NewSize, 0);
+
+    if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(OldPtr), MinAlignment)))
+      reportMisalignedPointer(AllocatorAction::Reallocating, OldPtr);
+
+    Chunk::UnpackedHeader OldHeader;
+    Chunk::loadHeader(Cookie, OldPtr, &OldHeader);
+
+    if (UNLIKELY(OldHeader.State != Chunk::State::Allocated))
+      reportInvalidChunkState(AllocatorAction::Reallocating, OldPtr);
+
+    // Pointer has to be allocated with a malloc-type function. Some
+    // applications think that it is OK to realloc a memalign'ed pointer, which
+    // will trigger this check. It really isn't.
+    if (Options.DeallocTypeMismatch) {
+      if (UNLIKELY(OldHeader.Origin != Chunk::Origin::Malloc))
+        reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr,
+                                  OldHeader.Origin, Chunk::Origin::Malloc);
+    }
+
+    const uptr OldSize = getSize(OldPtr, &OldHeader);
+    // If the new size is identical to the old one, or lower but within an
+    // acceptable range, we just keep the old chunk, and update its header.
+    if (NewSize == OldSize)
+      return OldPtr;
+    if (NewSize < OldSize) {
+      const uptr Delta = OldSize - NewSize;
+      if (Delta < (SizeClassMap::MaxSize / 2)) {
+        Chunk::UnpackedHeader NewHeader = OldHeader;
+        NewHeader.SizeOrUnusedBytes =
+            (OldHeader.ClassId ? NewHeader.SizeOrUnusedBytes - Delta
+                               : NewHeader.SizeOrUnusedBytes + Delta) &
+            Chunk::SizeOrUnusedBytesMask;
+        Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader);
+        return OldPtr;
+      }
+    }
+
+    // Otherwise we allocate a new one, and deallocate the old one. Some
+    // allocators will allocate an even larger chunk (by a fixed factor) to
+    // allow for potential further in-place realloc. The gains of such a trick
+    // are currently unclear.
+    void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment);
+    if (NewPtr) {
+      memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
+      quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize);
+    }
+    return NewPtr;
+  }
+
+  // TODO(kostyak): while this locks the Primary & Secondary, it still allows
+  //                pointers to be fetched from the TSD. We ultimately want to
+  //                lock the registry as well. For now, it's good enough.
+  void disable() {
+    initThreadMaybe();
+    Primary.disable();
+    Secondary.disable();
+  }
+
+  void enable() {
+    initThreadMaybe();
+    Secondary.enable();
+    Primary.enable();
+  }
+
+  void printStats() {
+    disable();
+    Primary.printStats();
+    Secondary.printStats();
+    Quarantine.printStats();
+    enable();
+  }
+
+  void releaseToOS() { Primary.releaseToOS(); }
+
+  // Iterate over all chunks and call a callback for all busy chunks located
+  // within the provided memory range. Said callback must not use this allocator
+  // or a deadlock can ensue. This fits Android's malloc_iterate() needs.
+  void iterateOverChunks(uptr Base, uptr Size, iterate_callback Callback,
+                         void *Arg) {
+    initThreadMaybe();
+    const uptr From = Base;
+    const uptr To = Base + Size;
+    auto Lambda = [this, From, To, Callback, Arg](uptr Block) {
+      if (Block < From || Block > To)
+        return;
+      uptr ChunkSize;
+      const uptr ChunkBase = getChunkFromBlock(Block, &ChunkSize);
+      if (ChunkBase != InvalidChunk)
+        Callback(ChunkBase, ChunkSize, Arg);
+    };
+    Primary.iterateOverBlocks(Lambda);
+    Secondary.iterateOverBlocks(Lambda);
+  }
+
+  bool canReturnNull() {
+    initThreadMaybe();
+    return Options.MayReturnNull;
+  }
+
+  // TODO(kostyak): implement this as a "backend" to mallopt.
+  bool setOption(UNUSED uptr Option, UNUSED uptr Value) { return false; }
+
+  // Return the usable size for a given chunk. Technically we lie, as we just
+  // report the actual size of a chunk. This is done to counteract code actively
+  // writing past the end of a chunk (like sqlite3) when the usable size allows
+  // for it, which then forces realloc to copy the usable size of a chunk as
+  // opposed to its actual size.
+  uptr getUsableSize(const void *Ptr) {
+    initThreadMaybe();
+    if (UNLIKELY(!Ptr))
+      return 0;
+    Chunk::UnpackedHeader Header;
+    Chunk::loadHeader(Cookie, Ptr, &Header);
+    // Getting the usable size of a chunk only makes sense if it's allocated.
+    if (UNLIKELY(Header.State != Chunk::State::Allocated))
+      reportInvalidChunkState(AllocatorAction::Sizing, const_cast<void *>(Ptr));
+    return getSize(Ptr, &Header);
+  }
+
+  void getStats(StatCounters S) {
+    initThreadMaybe();
+    Stats.get(S);
+  }
+
+private:
+  typedef MapAllocator SecondaryT;
+  typedef typename PrimaryT::SizeClassMap SizeClassMap;
+
+  static const uptr MinAlignmentLog = SCUDO_MIN_ALIGNMENT_LOG;
+  static const uptr MaxAlignmentLog = 24U; // 16 MB seems reasonable.
+  static const uptr MinAlignment = 1UL << MinAlignmentLog;
+  static const uptr MaxAlignment = 1UL << MaxAlignmentLog;
+  static const uptr MaxAllowedMallocSize =
+      FIRST_32_SECOND_64(1UL << 31, 1ULL << 40);
+
+  // Constants used by the chunk iteration mechanism.
+  static const u32 BlockMarker = 0x44554353U;
+  static const uptr InvalidChunk = ~static_cast<uptr>(0);
+
+  GlobalStats Stats;
+  TSDRegistryT TSDRegistry;
+  PrimaryT Primary;
+  SecondaryT Secondary;
+  QuarantineT Quarantine;
+
+  u32 Cookie;
+
+  struct {
+    u8 MayReturnNull : 1;       // may_return_null
+    u8 ZeroContents : 1;        // zero_contents
+    u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch
+    u8 DeleteSizeMismatch : 1;  // delete_size_mismatch
+    u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size
+  } Options;
+
+  // The following might get optimized out by the compiler.
+  NOINLINE void performSanityChecks() {
+    // Verify that the header offset field can hold the maximum offset. In the
+    // case of the Secondary allocator, it takes care of alignment and the
+    // offset will always be small. In the case of the Primary, the worst case
+    // scenario happens in the last size class, when the backend allocation
+    // would already be aligned on the requested alignment, which would happen
+    // to be the maximum alignment that would fit in that size class. As a
+    // result, the maximum offset will be at most the maximum alignment for the
+    // last size class minus the header size, in multiples of MinAlignment.
+    Chunk::UnpackedHeader Header = {};
+    const uptr MaxPrimaryAlignment = 1UL << getMostSignificantSetBitIndex(
+                                         SizeClassMap::MaxSize - MinAlignment);
+    const uptr MaxOffset =
+        (MaxPrimaryAlignment - Chunk::getHeaderSize()) >> MinAlignmentLog;
+    Header.Offset = MaxOffset & Chunk::OffsetMask;
+    if (UNLIKELY(Header.Offset != MaxOffset))
+      reportSanityCheckError("offset");
+
+    // Verify that we can fit the maximum size or amount of unused bytes in the
+    // header. Given that the Secondary fits the allocation to a page, the worst
+    // case scenario happens in the Primary. It will depend on the second to
+    // last and last class sizes, as well as the dynamic base for the Primary.
+    // The following is an over-approximation that works for our needs.
+    const uptr MaxSizeOrUnusedBytes = SizeClassMap::MaxSize - 1;
+    Header.SizeOrUnusedBytes =
+        MaxSizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask;
+    if (UNLIKELY(Header.SizeOrUnusedBytes != MaxSizeOrUnusedBytes))
+      reportSanityCheckError("size (or unused bytes)");
+
+    const uptr LargestClassId = SizeClassMap::LargestClassId;
+    Header.ClassId = LargestClassId;
+    if (UNLIKELY(Header.ClassId != LargestClassId))
+      reportSanityCheckError("class ID");
+  }
+
+  // Return the size of a chunk as requested during its allocation.
+  INLINE uptr getSize(const void *Ptr, Chunk::UnpackedHeader *Header) {
+    const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes;
+    if (Header->ClassId)
+      return SizeOrUnusedBytes;
+    return SecondaryT::getBlockEnd(Chunk::getBlockBegin(Ptr, Header)) -
+           reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes;
+  }
+
+  ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
+    TSDRegistry.initThreadMaybe(this, MinimalInit);
+  }
+
+  void quarantineOrDeallocateChunk(void *Ptr, Chunk::UnpackedHeader *Header,
+                                   uptr Size) {
+    Chunk::UnpackedHeader NewHeader = *Header;
+    // If the quarantine is disabled, the actual size of a chunk is 0 or larger
+    // than the maximum allowed, we return a chunk directly to the backend.
+    const bool BypassQuarantine = !Quarantine.getCacheSize() || !Size ||
+                                  (Size > Options.QuarantineMaxChunkSize);
+    if (BypassQuarantine) {
+      NewHeader.State = Chunk::State::Available;
+      Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header);
+      void *BlockBegin = Chunk::getBlockBegin(Ptr, Header);
+      const uptr ClassId = NewHeader.ClassId;
+      if (ClassId) {
+        bool UnlockRequired;
+        auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
+        TSD->Cache.deallocate(ClassId, BlockBegin);
+        if (UnlockRequired)
+          TSD->unlock();
+      } else {
+        Secondary.deallocate(BlockBegin);
+      }
+    } else {
+      NewHeader.State = Chunk::State::Quarantined;
+      Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header);
+      bool UnlockRequired;
+      auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
+      Quarantine.put(&TSD->QuarantineCache,
+                     QuarantineCallback(*this, TSD->Cache), Ptr, Size);
+      if (UnlockRequired)
+        TSD->unlock();
+    }
+  }
+
+  // This only cares about valid busy chunks. This might change in the future.
+  uptr getChunkFromBlock(uptr Block, uptr *Size) {
+    u32 Offset = 0;
+    if (reinterpret_cast<u32 *>(Block)[0] == BlockMarker)
+      Offset = reinterpret_cast<u32 *>(Block)[1];
+    const uptr P = Block + Offset + Chunk::getHeaderSize();
+    const void *Ptr = reinterpret_cast<const void *>(P);
+    Chunk::UnpackedHeader Header;
+    if (!Chunk::isValid(Cookie, Ptr, &Header) ||
+        Header.State != Chunk::State::Allocated)
+      return InvalidChunk;
+    if (Size)
+      *Size = getSize(Ptr, &Header);
+    return P;
+  }
+};
+
+} // namespace scudo
+
+#endif // SCUDO_COMBINED_H_
Index: compiler-rt/trunk/lib/scudo/standalone/tests/CMakeLists.txt
===================================================================
--- compiler-rt/trunk/lib/scudo/standalone/tests/CMakeLists.txt
+++ compiler-rt/trunk/lib/scudo/standalone/tests/CMakeLists.txt
@@ -53,6 +53,7 @@
   bytemap_test.cc
   checksum_test.cc
   chunk_test.cc
+  combined_test.cc
   flags_test.cc
   list_test.cc
   map_test.cc
Index: compiler-rt/trunk/lib/scudo/standalone/tests/combined_test.cc
===================================================================
--- compiler-rt/trunk/lib/scudo/standalone/tests/combined_test.cc
+++ compiler-rt/trunk/lib/scudo/standalone/tests/combined_test.cc
@@ -0,0 +1,237 @@
+//===-- combined_test.cc ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "allocator_config.h"
+#include "combined.h"
+
+#include "gtest/gtest.h"
+
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+
+static std::mutex Mutex;
+static std::condition_variable Cv;
+static bool Ready = false;
+
+static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc;
+
+// This allows us to turn on the Quarantine for specific tests. The Quarantine
+// parameters are on the low end, to avoid having to loop excessively in some
+// tests.
+static bool UseQuarantine = false;
+extern "C" const char *__scudo_default_options() {
+  if (!UseQuarantine)
+    return "";
+  return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:"
+         "quarantine_max_chunk_size=1024";
+}
+
+template <class Config> static void testAllocator() {
+  using AllocatorT = scudo::Allocator<Config>;
+  auto Deleter = [](AllocatorT *A) {
+    A->unmapTestOnly();
+    delete A;
+  };
+  std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
+                                                           Deleter);
+  Allocator->reset();
+
+  constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U);
+
+  // This allocates and deallocates a bunch of chunks, with a wide range of
+  // sizes and alignments, with a focus on sizes that could trigger weird
+  // behaviors (plus or minus a small delta of a power of two for example).
+  for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) {
+    for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) {
+      const scudo::uptr Align = 1U << AlignLog;
+      for (scudo::sptr Delta = -32; Delta <= 32; Delta++) {
+        if (static_cast<scudo::sptr>(1U << SizeLog) + Delta <= 0)
+          continue;
+        const scudo::uptr Size = (1U << SizeLog) + Delta;
+        void *P = Allocator->allocate(Size, Origin, Align);
+        EXPECT_NE(P, nullptr);
+        EXPECT_TRUE(scudo::isAligned(reinterpret_cast<scudo::uptr>(P), Align));
+        EXPECT_LE(Size, Allocator->getUsableSize(P));
+        memset(P, 0xaa, Size);
+        Allocator->deallocate(P, Origin, Size);
+      }
+    }
+  }
+  Allocator->releaseToOS();
+
+  // Verify that a chunk will end up being reused, at some point.
+  const scudo::uptr NeedleSize = 1024U;
+  void *NeedleP = Allocator->allocate(NeedleSize, Origin);
+  Allocator->deallocate(NeedleP, Origin);
+  bool Found = false;
+  for (scudo::uptr I = 0; I < 1024U && !Found; I++) {
+    void *P = Allocator->allocate(NeedleSize, Origin);
+    if (P == NeedleP)
+      Found = true;
+    Allocator->deallocate(P, Origin);
+  }
+  EXPECT_TRUE(Found);
+
+  constexpr scudo::uptr MaxSize = Config::Primary::SizeClassMap::MaxSize;
+
+  // Reallocate a large chunk all the way down to a byte, verifying that we
+  // preserve the data in the process.
+  scudo::uptr Size = MaxSize * 2;
+  const scudo::uptr DataSize = 2048U;
+  void *P = Allocator->allocate(Size, Origin);
+  const char Marker = 0xab;
+  memset(P, Marker, scudo::Min(Size, DataSize));
+  while (Size > 1U) {
+    Size /= 2U;
+    void *NewP = Allocator->reallocate(P, Size);
+    EXPECT_NE(NewP, nullptr);
+    for (scudo::uptr J = 0; J < scudo::Min(Size, DataSize); J++)
+      EXPECT_EQ((reinterpret_cast<char *>(NewP))[J], Marker);
+    P = NewP;
+  }
+  Allocator->deallocate(P, Origin);
+
+  // Allocates a bunch of chunks, then iterate over all the chunks, ensuring
+  // they are the ones we allocated. This requires the allocator to not have any
+  // other allocated chunk at this point (eg: won't work with the Quarantine).
+  if (!UseQuarantine) {
+    std::vector<void *> V;
+    for (scudo::uptr I = 0; I < 64U; I++)
+      V.push_back(Allocator->allocate(rand() % (MaxSize / 2U), Origin));
+    Allocator->disable();
+    Allocator->iterateOverChunks(
+        0U, static_cast<scudo::uptr>(SCUDO_MMAP_RANGE_SIZE - 1),
+        [](uintptr_t Base, size_t Size, void *Arg) {
+          std::vector<void *> *V = reinterpret_cast<std::vector<void *> *>(Arg);
+          void *P = reinterpret_cast<void *>(Base);
+          EXPECT_NE(std::find(V->begin(), V->end(), P), V->end());
+        },
+        reinterpret_cast<void *>(&V));
+    Allocator->enable();
+    while (!V.empty()) {
+      Allocator->deallocate(V.back(), Origin);
+      V.pop_back();
+    }
+  }
+
+  Allocator->releaseToOS();
+  Allocator->printStats();
+}
+
+TEST(ScudoCombinedTest, BasicCombined) {
+  testAllocator<scudo::DefaultConfig>();
+#if SCUDO_WORDSIZE == 64U
+  testAllocator<scudo::FuchsiaConfig>();
+#endif
+  // The following configs should work on all platforms.
+  UseQuarantine = true;
+  testAllocator<scudo::AndroidConfig>();
+  UseQuarantine = false;
+  testAllocator<scudo::AndroidSvelteConfig>();
+}
+
+template <typename AllocatorT> static void stressAllocator(AllocatorT *A) {
+  {
+    std::unique_lock<std::mutex> Lock(Mutex);
+    while (!Ready)
+      Cv.wait(Lock);
+  }
+  std::vector<std::pair<void *, scudo::uptr>> V;
+  for (scudo::uptr I = 0; I < 256U; I++) {
+    const scudo::uptr Size = std::rand() % 4096U;
+    void *P = A->allocate(Size, Origin);
+    // A region could have ran out of memory, resulting in a null P.
+    if (P)
+      V.push_back(std::make_pair(P, Size));
+  }
+  while (!V.empty()) {
+    auto Pair = V.back();
+    A->deallocate(Pair.first, Origin, Pair.second);
+    V.pop_back();
+  }
+}
+
+template <class Config> static void testAllocatorThreaded() {
+  using AllocatorT = scudo::Allocator<Config>;
+  auto Deleter = [](AllocatorT *A) {
+    A->unmapTestOnly();
+    delete A;
+  };
+  std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
+                                                           Deleter);
+  Allocator->reset();
+  std::thread Threads[32];
+  for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++)
+    Threads[I] = std::thread(stressAllocator<AllocatorT>, Allocator.get());
+  {
+    std::unique_lock<std::mutex> Lock(Mutex);
+    Ready = true;
+    Cv.notify_all();
+  }
+  for (auto &T : Threads)
+    T.join();
+  Allocator->releaseToOS();
+}
+
+TEST(ScudoCombinedTest, ThreadedCombined) {
+  testAllocatorThreaded<scudo::DefaultConfig>();
+#if SCUDO_WORDSIZE == 64U
+  testAllocatorThreaded<scudo::FuchsiaConfig>();
+#endif
+  UseQuarantine = true;
+  testAllocatorThreaded<scudo::AndroidConfig>();
+  UseQuarantine = false;
+  testAllocatorThreaded<scudo::AndroidSvelteConfig>();
+}
+
+struct DeathConfig {
+  // Tiny allocator, its Primary only serves chunks of 1024 bytes.
+  using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>;
+  typedef scudo::SizeClassAllocator32<DeathSizeClassMap, 18U> Primary;
+  template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U>;
+};
+
+TEST(ScudoCombinedTest, DeathCombined) {
+  using AllocatorT = scudo::Allocator<DeathConfig>;
+  auto Deleter = [](AllocatorT *A) {
+    A->unmapTestOnly();
+    delete A;
+  };
+  std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
+                                                           Deleter);
+  Allocator->reset();
+
+  const scudo::uptr Size = 1000U;
+  void *P = Allocator->allocate(Size, Origin);
+  EXPECT_NE(P, nullptr);
+
+  // Invalid sized deallocation.
+  EXPECT_DEATH(Allocator->deallocate(P, Origin, Size + 8U), "");
+
+  // Misaligned pointer.
+  void *MisalignedP =
+      reinterpret_cast<void *>(reinterpret_cast<scudo::uptr>(P) | 1U);
+  EXPECT_DEATH(Allocator->deallocate(MisalignedP, Origin, Size), "");
+  EXPECT_DEATH(Allocator->reallocate(MisalignedP, Size * 2U), "");
+
+  // Header corruption.
+  scudo::u64 *H =
+      reinterpret_cast<scudo::u64 *>(scudo::Chunk::getAtomicHeader(P));
+  *H ^= 0x42U;
+  EXPECT_DEATH(Allocator->deallocate(P, Origin, Size), "");
+  *H ^= 0x420042U;
+  EXPECT_DEATH(Allocator->deallocate(P, Origin, Size), "");
+  *H ^= 0x420000U;
+
+  // Invalid chunk state.
+  Allocator->deallocate(P, Origin, Size);
+  EXPECT_DEATH(Allocator->deallocate(P, Origin, Size), "");
+  EXPECT_DEATH(Allocator->reallocate(P, Size * 2U), "");
+  EXPECT_DEATH(Allocator->getUsableSize(P), "");
+}