Index: lib/scudo/CMakeLists.txt =================================================================== --- lib/scudo/CMakeLists.txt +++ lib/scudo/CMakeLists.txt @@ -14,6 +14,8 @@ scudo_interceptors.cpp scudo_new_delete.cpp scudo_termination.cpp + scudo_thread_android.cpp + scudo_thread_linux.cpp scudo_utils.cpp) # Enable the SSE 4.2 instruction set for scudo_crc32.cpp, if available. Index: lib/scudo/scudo_allocator.h =================================================================== --- lib/scudo/scudo_allocator.h +++ lib/scudo/scudo_allocator.h @@ -82,8 +82,57 @@ void copyTo(Flags *f, CommonFlags *cf) const; }; +#if SANITIZER_CAN_USE_ALLOCATOR64 +const uptr AllocatorSpace = ~0ULL; +# if defined(__aarch64__) && SANITIZER_ANDROID +const uptr AllocatorSize = 0x4000000000ULL; // 256G. +typedef DefaultSizeClassMap SizeClassMap; +# elif defined(__aarch64__) +// AArch64/SANITIZER_CAN_USER_ALLOCATOR64 is only for 42-bit VMA +// so no need to different values for different VMA. +const uptr kAllocatorSize = 0x10000000000ULL; // 1T. +typedef DefaultSizeClassMap SizeClassMap; +# else +const uptr AllocatorSize = 0x40000000000ULL; // 4T. +typedef DefaultSizeClassMap SizeClassMap; +# endif +struct AP { + static const uptr kSpaceBeg = AllocatorSpace; + static const uptr kSpaceSize = AllocatorSize; + static const uptr kMetadataSize = 0; + typedef __scudo::SizeClassMap SizeClassMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = + SizeClassAllocator64FlagMasks::kRandomShuffleChunks; +}; +typedef SizeClassAllocator64 PrimaryAllocator; +#else +// Currently, the 32-bit Sanitizer allocator has not yet benefited from all the +// security improvements brought to the 64-bit one. This makes the 32-bit +// version of Scudo slightly less toughened. +static const uptr RegionSizeLog = 20; +static const uptr NumRegions = SANITIZER_MMAP_RANGE_SIZE >> RegionSizeLog; +# if SANITIZER_WORDSIZE == 32 +typedef FlatByteMap ByteMap; +# elif SANITIZER_WORDSIZE == 64 +typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap; +# endif // SANITIZER_WORDSIZE +typedef DefaultSizeClassMap SizeClassMap; +typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap, + RegionSizeLog, ByteMap> PrimaryAllocator; +#endif // SANITIZER_CAN_USE_ALLOCATOR64 + +#include "scudo_allocator_secondary.h" + +typedef SizeClassAllocatorLocalCache AllocatorCache; +typedef ScudoLargeMmapAllocator SecondaryAllocator; +typedef CombinedAllocator + ScudoBackendAllocator; + +ScudoBackendAllocator &getBackendAllocator(); + +void initScudo(); void initAllocator(const AllocatorOptions &options); -void drainQuarantine(); void *scudoMalloc(uptr Size, AllocType Type); void scudoFree(void *Ptr, AllocType Type); @@ -97,8 +146,6 @@ void *scudoAlignedAlloc(uptr Alignment, uptr Size); uptr scudoMallocUsableSize(void *Ptr); -#include "scudo_allocator_secondary.h" - } // namespace __scudo #endif // SCUDO_ALLOCATOR_H_ Index: lib/scudo/scudo_allocator.cpp =================================================================== --- lib/scudo/scudo_allocator.cpp +++ lib/scudo/scudo_allocator.cpp @@ -15,56 +15,16 @@ //===----------------------------------------------------------------------===// #include "scudo_allocator.h" +#include "scudo_thread.h" #include "scudo_utils.h" #include "sanitizer_common/sanitizer_allocator_interface.h" #include "sanitizer_common/sanitizer_quarantine.h" -#include -#include - -#include +#include namespace __scudo { -#if SANITIZER_CAN_USE_ALLOCATOR64 -const uptr AllocatorSpace = ~0ULL; -const uptr AllocatorSize = 0x40000000000ULL; -typedef DefaultSizeClassMap SizeClassMap; -struct AP { - static const uptr kSpaceBeg = AllocatorSpace; - static const uptr kSpaceSize = AllocatorSize; - static const uptr kMetadataSize = 0; - typedef __scudo::SizeClassMap SizeClassMap; - typedef NoOpMapUnmapCallback MapUnmapCallback; - static const uptr kFlags = - SizeClassAllocator64FlagMasks::kRandomShuffleChunks; -}; -typedef SizeClassAllocator64 PrimaryAllocator; -#else -// Currently, the 32-bit Sanitizer allocator has not yet benefited from all the -// security improvements brought to the 64-bit one. This makes the 32-bit -// version of Scudo slightly less toughened. -static const uptr RegionSizeLog = 20; -static const uptr NumRegions = SANITIZER_MMAP_RANGE_SIZE >> RegionSizeLog; -# if SANITIZER_WORDSIZE == 32 -typedef FlatByteMap ByteMap; -# elif SANITIZER_WORDSIZE == 64 -typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap; -# endif // SANITIZER_WORDSIZE -typedef DefaultSizeClassMap SizeClassMap; -typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap, - RegionSizeLog, ByteMap> PrimaryAllocator; -#endif // SANITIZER_CAN_USE_ALLOCATOR64 - -typedef SizeClassAllocatorLocalCache AllocatorCache; -typedef ScudoLargeMmapAllocator SecondaryAllocator; -typedef CombinedAllocator - ScudoAllocator; - -static ScudoAllocator &getAllocator(); - -static thread_local Xorshift128Plus Prng; // Global static cookie, initialized at start-up. static uptr Cookie; @@ -101,9 +61,10 @@ // Returns the usable size for a chunk, meaning the amount of bytes from the // beginning of the user data to the end of the backend allocated chunk. uptr getUsableSize(UnpackedHeader *Header) { - uptr Size = getAllocator().GetActuallyAllocatedSize(getAllocBeg(Header)); + uptr Size = + getBackendAllocator().GetActuallyAllocatedSize(getAllocBeg(Header)); if (Size == 0) - return Size; + return 0; return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog); } @@ -120,7 +81,8 @@ return static_cast(Crc); } - // Checks the validity of a chunk by verifying its checksum. + // Checks the validity of a chunk by verifying its checksum. This check + // doesn't incur termination in case of an invalid chunk. bool isValid() { UnpackedHeader NewUnpackedHeader; const AtomicPackedHeader *AtomicHeader = @@ -130,13 +92,27 @@ return (NewUnpackedHeader.Checksum == computeChecksum(&NewUnpackedHeader)); } + // Nulls out a chunk header. When returning the chunk to the backend, there + // is no need to store a valid ChunkAvailable header, as this would be + // computationally expensive. Zero-ing out serves the same purpose by making + // the header invalid. In the extremely rare event where 0 would be a valid + // checksum for the chunk, the state of the chunk is ChunkAvailable anyway. + COMPILER_CHECK(ChunkAvailable == 0); + void eraseHeader() { + PackedHeader NullPackedHeader = 0; + AtomicPackedHeader *AtomicHeader = + reinterpret_cast(this); + atomic_store_relaxed(AtomicHeader, NullPackedHeader); + } + // Loads and unpacks the header, verifying the checksum in the process. void loadHeader(UnpackedHeader *NewUnpackedHeader) const { const AtomicPackedHeader *AtomicHeader = reinterpret_cast(this); PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader); *NewUnpackedHeader = bit_cast(NewPackedHeader); - if (NewUnpackedHeader->Checksum != computeChecksum(NewUnpackedHeader)) { + if (UNLIKELY(NewUnpackedHeader->Checksum + != computeChecksum(NewUnpackedHeader))) { dieWithMessage("ERROR: corrupted chunk header at address %p\n", this); } } @@ -160,10 +136,10 @@ PackedHeader OldPackedHeader = bit_cast(*OldUnpackedHeader); AtomicPackedHeader *AtomicHeader = reinterpret_cast(this); - if (!atomic_compare_exchange_strong(AtomicHeader, - &OldPackedHeader, - NewPackedHeader, - memory_order_relaxed)) { + if (UNLIKELY(!atomic_compare_exchange_strong(AtomicHeader, + &OldPackedHeader, + NewPackedHeader, + memory_order_relaxed))) { dieWithMessage("ERROR: race on chunk header at address %p\n", this); } } @@ -171,30 +147,7 @@ static bool ScudoInitIsRunning = false; -static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT; -static pthread_key_t PThreadKey; - -static thread_local bool ThreadInited = false; -static thread_local bool ThreadTornDown = false; -static thread_local AllocatorCache Cache; - -static void teardownThread(void *p) { - uptr v = reinterpret_cast(p); - // The glibc POSIX thread-local-storage deallocation routine calls user - // provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS. - // We want to be called last since other destructors might call free and the - // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the - // quarantine and swallowing the cache. - if (v < PTHREAD_DESTRUCTOR_ITERATIONS) { - pthread_setspecific(PThreadKey, reinterpret_cast(v + 1)); - return; - } - drainQuarantine(); - getAllocator().DestroyCache(&Cache); - ThreadTornDown = true; -} - -static void initInternal() { +void initScudo() { SanitizerToolName = "Scudo"; CHECK(!ScudoInitIsRunning && "Scudo init calls itself!"); ScudoInitIsRunning = true; @@ -205,28 +158,13 @@ } initFlags(); - AllocatorOptions Options; Options.setFrom(getFlags(), common_flags()); initAllocator(Options); - MaybeStartBackgroudThread(); - ScudoInitIsRunning = false; } -static void initGlobal() { - pthread_key_create(&PThreadKey, teardownThread); - initInternal(); -} - -static void NOINLINE initThread() { - pthread_once(&GlobalInited, initGlobal); - pthread_setspecific(PThreadKey, reinterpret_cast(1)); - getAllocator().InitCache(&Cache); - ThreadInited = true; -} - struct QuarantineCallback { explicit QuarantineCallback(AllocatorCache *Cache) : Cache_(Cache) {} @@ -235,38 +173,45 @@ void Recycle(ScudoChunk *Chunk) { UnpackedHeader Header; Chunk->loadHeader(&Header); - if (Header.State != ChunkQuarantine) { + if (UNLIKELY(Header.State != ChunkQuarantine)) { dieWithMessage("ERROR: invalid chunk state when recycling address %p\n", Chunk); } + Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(&Header); - getAllocator().Deallocate(Cache_, Ptr); + getBackendAllocator().Deallocate(Cache_, Ptr); } /// Internal quarantine allocation and deallocation functions. void *Allocate(uptr Size) { - // The internal quarantine memory cannot be protected by us. But the only - // structures allocated are QuarantineBatch, that are 8KB for x64. So we - // will use mmap for those, and given that Deallocate doesn't pass a size - // in, we enforce the size of the allocation to be sizeof(QuarantineBatch). - // TODO(kostyak): switching to mmap impacts greatly performances, we have - // to find another solution - // CHECK_EQ(Size, sizeof(QuarantineBatch)); - // return MmapOrDie(Size, "QuarantineBatch"); - return getAllocator().Allocate(Cache_, Size, 1, false); + // TODO(kostyak): figure out the best way to protect the batches. + return getBackendAllocator().Allocate(Cache_, Size, MinAlignment); } void Deallocate(void *Ptr) { - // UnmapOrDie(Ptr, sizeof(QuarantineBatch)); - getAllocator().Deallocate(Cache_, Ptr); + getBackendAllocator().Deallocate(Cache_, Ptr); } AllocatorCache *Cache_; }; typedef Quarantine ScudoQuarantine; -typedef ScudoQuarantine::Cache QuarantineCache; -static thread_local QuarantineCache ThreadQuarantineCache; +typedef ScudoQuarantine::Cache ScudoQuarantineCache; +COMPILER_CHECK(sizeof(ScudoQuarantineCache) + <= sizeof(ScudoThreadContext::QuarantineCachePlaceHolder)); + +AllocatorCache *getAllocatorCache(ScudoThreadContext *ThreadContext) { + return &ThreadContext->Cache; +} + +ScudoQuarantineCache *getQuarantineCache(ScudoThreadContext *ThreadContext) { + return reinterpret_cast< + ScudoQuarantineCache *>(ThreadContext->QuarantineCachePlaceHolder); +} + +Xorshift128Plus *getPrng(ScudoThreadContext *ThreadContext) { + return &ThreadContext->Prng; +} void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) { MayReturnNull = cf->allocator_may_return_null; @@ -288,11 +233,11 @@ f->ZeroContents = ZeroContents; } -struct Allocator { +struct ScudoAllocator { static const uptr MaxAllowedMallocSize = FIRST_32_SECOND_64(2UL << 30, 1ULL << 40); - ScudoAllocator BackendAllocator; + ScudoBackendAllocator BackendAllocator; ScudoQuarantine AllocatorQuarantine; // The fallback caches are used when the thread local caches have been @@ -300,13 +245,14 @@ // be accessed by different threads. StaticSpinMutex FallbackMutex; AllocatorCache FallbackAllocatorCache; - QuarantineCache FallbackQuarantineCache; + ScudoQuarantineCache FallbackQuarantineCache; + Xorshift128Plus GlobalPrng; bool DeallocationTypeMismatch; bool ZeroContents; bool DeleteSizeMismatch; - explicit Allocator(LinkerInitialized) + explicit ScudoAllocator(LinkerInitialized) : AllocatorQuarantine(LINKER_INITIALIZED), FallbackQuarantineCache(LINKER_INITIALIZED) {} @@ -348,38 +294,38 @@ AllocatorQuarantine.Init( static_cast(Options.QuarantineSizeMb) << 20, static_cast(Options.ThreadLocalQuarantineSizeKb) << 10); + FallbackMutex.Init(); BackendAllocator.InitCache(&FallbackAllocatorCache); - Cookie = Prng.Next(); + GlobalPrng.initFromURandom(); + Cookie = GlobalPrng.getNext(); } - // Helper function that checks for a valid Scudo chunk. + // Helper function that checks for a valid Scudo chunk. nullptr isn't. bool isValidPointer(const void *UserPtr) { - if (UNLIKELY(!ThreadInited)) - initThread(); - uptr ChunkBeg = reinterpret_cast(UserPtr); - if (!IsAligned(ChunkBeg, MinAlignment)) { + initThreadMaybe(); + if (!UserPtr) return false; - } - ScudoChunk *Chunk = - reinterpret_cast(ChunkBeg - AlignedChunkHeaderSize); - return Chunk->isValid(); + uptr UserBeg = reinterpret_cast(UserPtr); + if (!IsAligned(UserBeg, MinAlignment)) + return false; + return getScudoChunk(UserBeg)->isValid(); } // Allocates a chunk. - void *allocate(uptr Size, uptr Alignment, AllocType Type) { - if (UNLIKELY(!ThreadInited)) - initThread(); - if (!IsPowerOfTwo(Alignment)) { + void *allocate(uptr Size, uptr Alignment, AllocType Type, + bool ForceZeroContents = false) { + initThreadMaybe(); + if (UNLIKELY(!IsPowerOfTwo(Alignment))) { dieWithMessage("ERROR: alignment is not a power of 2\n"); } if (Alignment > MaxAlignment) return BackendAllocator.ReturnNullOrDieOnBadRequest(); if (Alignment < MinAlignment) Alignment = MinAlignment; - if (Size == 0) - Size = 1; if (Size >= MaxAllowedMallocSize) return BackendAllocator.ReturnNullOrDieOnBadRequest(); + if (Size == 0) + Size = 1; uptr NeededSize = RoundUpTo(Size, MinAlignment) + AlignedChunkHeaderSize; if (Alignment > MinAlignment) @@ -394,14 +340,23 @@ // combined allocator to accommodate the situation. bool FromPrimary = PrimaryAllocator::CanAllocate(NeededSize, MinAlignment); + uptr Salt; void *Ptr; - if (LIKELY(!ThreadTornDown)) { - Ptr = BackendAllocator.Allocate(&Cache, NeededSize, - FromPrimary ? MinAlignment : Alignment); + uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment; + ScudoThreadContext *ThreadContext = getCurrentThreadContext(); + // Based on what backs the ThreadContext, the locking mechanism differ (it + // might not be locked at all for thread_local variables). + if (ThreadContext) { + ThreadContext->Lock(); + Salt = getPrng(ThreadContext)->getNext(); + Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext), + NeededSize, AllocationAlignment); + ThreadContext->Unlock(); } else { - SpinMutexLock l(&FallbackMutex); - Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, NeededSize, - FromPrimary ? MinAlignment : Alignment); + SpinMutexLock Lock(&FallbackMutex); + Salt = GlobalPrng.getNext(); + Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, + NeededSize, AllocationAlignment); } if (!Ptr) return BackendAllocator.ReturnNullOrDieOnOOM(); @@ -419,109 +374,131 @@ uptr ActuallyAllocatedSize = BackendAllocator.GetActuallyAllocatedSize( reinterpret_cast(AllocBeg)); // If requested, we will zero out the entire contents of the returned chunk. - if (ZeroContents && FromPrimary) + if ((ForceZeroContents || ZeroContents) && FromPrimary) memset(Ptr, 0, ActuallyAllocatedSize); - uptr ChunkBeg = AllocBeg + AlignedChunkHeaderSize; - if (!IsAligned(ChunkBeg, Alignment)) - ChunkBeg = RoundUpTo(ChunkBeg, Alignment); - CHECK_LE(ChunkBeg + Size, AllocBeg + NeededSize); - ScudoChunk *Chunk = - reinterpret_cast(ChunkBeg - AlignedChunkHeaderSize); + uptr UserBeg = AllocBeg + AlignedChunkHeaderSize; + if (!IsAligned(UserBeg, Alignment)) + UserBeg = RoundUpTo(UserBeg, Alignment); + CHECK_LE(UserBeg + Size, AllocBeg + NeededSize); UnpackedHeader Header = {}; Header.State = ChunkAllocated; - uptr Offset = ChunkBeg - AlignedChunkHeaderSize - AllocBeg; + uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg; Header.Offset = Offset >> MinAlignmentLog; Header.AllocType = Type; Header.UnusedBytes = ActuallyAllocatedSize - Offset - AlignedChunkHeaderSize - Size; - Header.Salt = static_cast(Prng.Next()); - Chunk->storeHeader(&Header); - void *UserPtr = reinterpret_cast(ChunkBeg); - // TODO(kostyak): hooks sound like a terrible idea security wise but might - // be needed for things to work properly? + Header.Salt = static_cast(Salt); + getScudoChunk(UserBeg)->storeHeader(&Header); + void *UserPtr = reinterpret_cast(UserBeg); // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size); return UserPtr; } + // Place a chunk in the quarantine. In the event of a zero-sized quarantine, + // we directly deallocate the chunk, otherwise the flow would lead to the + // chunk being checksummed twice, once before Put and once in Recycle, with + // no additional security value. + void quarantineOrDeallocateChunk(ScudoChunk *Chunk, UnpackedHeader *Header, + uptr Size) { + bool BypassQuarantine = (AllocatorQuarantine.GetCacheSize() == 0); + ScudoThreadContext *ThreadContext = getCurrentThreadContext(); + if (BypassQuarantine) { + Chunk->eraseHeader(); + void *Ptr = Chunk->getAllocBeg(Header); + if (ThreadContext) { + ThreadContext->Lock(); + getBackendAllocator().Deallocate(getAllocatorCache(ThreadContext), Ptr); + ThreadContext->Unlock(); + } else { + SpinMutexLock Lock(&FallbackMutex); + getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr); + } + } else { + UnpackedHeader NewHeader = *Header; + NewHeader.State = ChunkQuarantine; + Chunk->compareExchangeHeader(&NewHeader, Header); + if (ThreadContext) { + ThreadContext->Lock(); + AllocatorQuarantine.Put(getQuarantineCache(ThreadContext), + QuarantineCallback( + getAllocatorCache(ThreadContext)), + Chunk, Size); + ThreadContext->Unlock(); + } else { + SpinMutexLock Lock(&FallbackMutex); + AllocatorQuarantine.Put(&FallbackQuarantineCache, + QuarantineCallback(&FallbackAllocatorCache), + Chunk, Size); + } + } + } + // Deallocates a Chunk, which means adding it to the delayed free list (or - // Quarantine). + // Quarantine) or returning it to the backend. void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) { - if (UNLIKELY(!ThreadInited)) - initThread(); - // TODO(kostyak): see hook comment above + initThreadMaybe(); // if (&__sanitizer_free_hook) __sanitizer_free_hook(UserPtr); if (!UserPtr) return; - uptr ChunkBeg = reinterpret_cast(UserPtr); - if (!IsAligned(ChunkBeg, MinAlignment)) { + uptr UserBeg = reinterpret_cast(UserPtr); + if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) { dieWithMessage("ERROR: attempted to deallocate a chunk not properly " "aligned at address %p\n", UserPtr); } - ScudoChunk *Chunk = - reinterpret_cast(ChunkBeg - AlignedChunkHeaderSize); + ScudoChunk *Chunk = getScudoChunk(UserBeg); UnpackedHeader OldHeader; Chunk->loadHeader(&OldHeader); - if (OldHeader.State != ChunkAllocated) { + if (UNLIKELY(OldHeader.State != ChunkAllocated)) { dieWithMessage("ERROR: invalid chunk state when deallocating address " "%p\n", UserPtr); } - uptr UsableSize = Chunk->getUsableSize(&OldHeader); - UnpackedHeader NewHeader = OldHeader; - NewHeader.State = ChunkQuarantine; - Chunk->compareExchangeHeader(&NewHeader, &OldHeader); if (DeallocationTypeMismatch) { // The deallocation type has to match the allocation one. - if (NewHeader.AllocType != Type) { + if (OldHeader.AllocType != Type) { // With the exception of memalign'd Chunks, that can be still be free'd. - if (NewHeader.AllocType != FromMemalign || Type != FromMalloc) { + if (OldHeader.AllocType != FromMemalign || Type != FromMalloc) { dieWithMessage("ERROR: allocation type mismatch on address %p\n", - Chunk); + UserPtr); } } } - uptr Size = UsableSize - OldHeader.UnusedBytes; + uptr UsableSize = Chunk->getUsableSize(&OldHeader); if (DeleteSizeMismatch) { + uptr Size = UsableSize - OldHeader.UnusedBytes; if (DeleteSize && DeleteSize != Size) { dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n", - Chunk); + UserPtr); } } - - if (LIKELY(!ThreadTornDown)) { - AllocatorQuarantine.Put(&ThreadQuarantineCache, - QuarantineCallback(&Cache), Chunk, UsableSize); - } else { - SpinMutexLock l(&FallbackMutex); - AllocatorQuarantine.Put(&FallbackQuarantineCache, - QuarantineCallback(&FallbackAllocatorCache), - Chunk, UsableSize); - } + quarantineOrDeallocateChunk(Chunk, &OldHeader, UsableSize); } // Reallocates a chunk. We can save on a new allocation if the new requested // size still fits in the chunk. void *reallocate(void *OldPtr, uptr NewSize) { - if (UNLIKELY(!ThreadInited)) - initThread(); - uptr ChunkBeg = reinterpret_cast(OldPtr); - ScudoChunk *Chunk = - reinterpret_cast(ChunkBeg - AlignedChunkHeaderSize); + initThreadMaybe(); + uptr UserBeg = reinterpret_cast(OldPtr); + if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) { + dieWithMessage("ERROR: attempted to reallocate a chunk not properly " + "aligned at address %p\n", OldPtr); + } + ScudoChunk *Chunk = getScudoChunk(UserBeg); UnpackedHeader OldHeader; Chunk->loadHeader(&OldHeader); - if (OldHeader.State != ChunkAllocated) { + if (UNLIKELY(OldHeader.State != ChunkAllocated)) { dieWithMessage("ERROR: invalid chunk state when reallocating address " "%p\n", OldPtr); } - uptr Size = Chunk->getUsableSize(&OldHeader); - if (OldHeader.AllocType != FromMalloc) { + if (UNLIKELY(OldHeader.AllocType != FromMalloc)) { dieWithMessage("ERROR: invalid chunk type when reallocating address %p\n", - Chunk); + OldPtr); } - UnpackedHeader NewHeader = OldHeader; + uptr UsableSize = Chunk->getUsableSize(&OldHeader); // The new size still fits in the current chunk. - if (NewSize <= Size) { - NewHeader.UnusedBytes = Size - NewSize; + if (NewSize <= UsableSize) { + UnpackedHeader NewHeader = OldHeader; + NewHeader.UnusedBytes = UsableSize - NewSize; Chunk->compareExchangeHeader(&NewHeader, &OldHeader); return OldPtr; } @@ -529,36 +506,28 @@ // old one. void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc); if (NewPtr) { - uptr OldSize = Size - OldHeader.UnusedBytes; + uptr OldSize = UsableSize - OldHeader.UnusedBytes; memcpy(NewPtr, OldPtr, Min(NewSize, OldSize)); - NewHeader.State = ChunkQuarantine; - Chunk->compareExchangeHeader(&NewHeader, &OldHeader); - if (LIKELY(!ThreadTornDown)) { - AllocatorQuarantine.Put(&ThreadQuarantineCache, - QuarantineCallback(&Cache), Chunk, Size); - } else { - SpinMutexLock l(&FallbackMutex); - AllocatorQuarantine.Put(&FallbackQuarantineCache, - QuarantineCallback(&FallbackAllocatorCache), - Chunk, Size); - } + quarantineOrDeallocateChunk(Chunk, &OldHeader, UsableSize); } return NewPtr; } + ScudoChunk *getScudoChunk(uptr UserBeg) { + return reinterpret_cast(UserBeg - AlignedChunkHeaderSize); + } + // Helper function that returns the actual usable size of a chunk. uptr getUsableSize(const void *Ptr) { - if (UNLIKELY(!ThreadInited)) - initThread(); + initThreadMaybe(); if (!Ptr) return 0; - uptr ChunkBeg = reinterpret_cast(Ptr); - ScudoChunk *Chunk = - reinterpret_cast(ChunkBeg - AlignedChunkHeaderSize); + uptr UserBeg = reinterpret_cast(Ptr); + ScudoChunk *Chunk = getScudoChunk(UserBeg); UnpackedHeader Header; Chunk->loadHeader(&Header); // Getting the usable size of a chunk only makes sense if it's allocated. - if (Header.State != ChunkAllocated) { + if (UNLIKELY(Header.State != ChunkAllocated)) { dieWithMessage("ERROR: invalid chunk state when sizing address %p\n", Ptr); } @@ -566,35 +535,31 @@ } void *calloc(uptr NMemB, uptr Size) { - if (UNLIKELY(!ThreadInited)) - initThread(); + initThreadMaybe(); uptr Total = NMemB * Size; - if (Size != 0 && Total / Size != NMemB) // Overflow check + if (Size != 0 && Total / Size != NMemB) // Overflow check return BackendAllocator.ReturnNullOrDieOnBadRequest(); - void *Ptr = allocate(Total, MinAlignment, FromMalloc); - // If ZeroContents, the content of the chunk has already been zero'd out. - if (!ZeroContents && Ptr && BackendAllocator.FromPrimary(Ptr)) - memset(Ptr, 0, getUsableSize(Ptr)); - return Ptr; + return allocate(Total, MinAlignment, FromMalloc, true); } - void drainQuarantine() { - AllocatorQuarantine.Drain(&ThreadQuarantineCache, - QuarantineCallback(&Cache)); + void commitBack(ScudoThreadContext *ThreadContext) { + AllocatorCache *Cache = getAllocatorCache(ThreadContext); + AllocatorQuarantine.Drain(getQuarantineCache(ThreadContext), + QuarantineCallback(Cache)); + BackendAllocator.DestroyCache(Cache); } uptr getStats(AllocatorStat StatType) { - if (UNLIKELY(!ThreadInited)) - initThread(); + initThreadMaybe(); uptr stats[AllocatorStatCount]; BackendAllocator.GetStats(stats); return stats[StatType]; } }; -static Allocator Instance(LINKER_INITIALIZED); +static ScudoAllocator Instance(LINKER_INITIALIZED); -static ScudoAllocator &getAllocator() { +ScudoBackendAllocator &getBackendAllocator() { return Instance.BackendAllocator; } @@ -602,8 +567,8 @@ Instance.init(Options); } -void drainQuarantine() { - Instance.drainQuarantine(); +void ScudoThreadContext::commitBack() { + Instance.commitBack(this); } void *scudoMalloc(uptr Size, AllocType Type) { Index: lib/scudo/scudo_allocator_secondary.h =================================================================== --- lib/scudo/scudo_allocator_secondary.h +++ lib/scudo/scudo_allocator_secondary.h @@ -88,8 +88,11 @@ // The primary adds the whole class size to the stats when allocating a // chunk, so we will do something similar here. But we will not account for // the guard pages. - Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize); - Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize); + { + SpinMutexLock l(&StatsMutex); + Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize); + Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize); + } return reinterpret_cast(UserBeg); } @@ -112,8 +115,11 @@ void Deallocate(AllocatorStats *Stats, void *Ptr) { SecondaryHeader *Header = getHeader(Ptr); - Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize); - Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize); + { + SpinMutexLock l(&StatsMutex); + Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize); + Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize); + } UnmapOrDie(reinterpret_cast(Header->MapBeg), Header->MapSize); } @@ -127,7 +133,7 @@ uptr GetActuallyAllocatedSize(void *Ptr) { SecondaryHeader *Header = getHeader(Ptr); - // Deduct PageSize as MapEnd includes the trailing guard page. + // Deduct PageSize as MapSize includes the trailing guard page. uptr MapEnd = Header->MapBeg + Header->MapSize - PageSize; return MapEnd - reinterpret_cast(Ptr); } @@ -182,6 +188,7 @@ const uptr SecondaryHeaderSize = sizeof(SecondaryHeader); const uptr HeadersSize = SecondaryHeaderSize + AlignedChunkHeaderSize; uptr PageSize; + SpinMutex StatsMutex; atomic_uint8_t MayReturnNull; }; Index: lib/scudo/scudo_thread.h =================================================================== --- /dev/null +++ lib/scudo/scudo_thread.h @@ -0,0 +1,46 @@ +//===-- scudo_thread.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// TODO(kostyak): description. +/// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_THREAD_H_ +#define SCUDO_THREAD_H_ + +#include "scudo_allocator.h" +#include "scudo_utils.h" + +#include "sanitizer_common/sanitizer_platform.h" + +namespace __scudo { + +struct ScudoThreadContext { + public: + StaticSpinMutex Mutex; + AllocatorCache Cache; + Xorshift128Plus Prng; + uptr QuarantineCachePlaceHolder[4]; + void init() { + Mutex.Init(); + getBackendAllocator().InitCache(&Cache); + Prng.initFromURandom(); + memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder)); + } + void commitBack(); + void Lock(); + void Unlock(); +}; + +void initThreadMaybe(); +ScudoThreadContext *getCurrentThreadContext(); + +} // namespace __scudo + +#endif // SCUDO_THREAD_H_ Index: lib/scudo/scudo_thread_android.cpp =================================================================== --- /dev/null +++ lib/scudo/scudo_thread_android.cpp @@ -0,0 +1,73 @@ +//===-- scudo_thread_android.cpp --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// TODO(kostyak): description. +/// +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" + +#if SANITIZER_LINUX && SANITIZER_ANDROID + +#include "scudo_thread.h" + +#include "sanitizer_common/sanitizer_atomic.h" + +#include + +namespace __scudo { + +static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT; +static pthread_key_t PThreadKey; + +// For Android, and possibly other platforms with stronger memory constraints, +// we can't necessarily afford a context per thread. The alternative adopted +// here is a fixed set of contexts (the number being defined at compile time), +// where contexts are assigned to new threads in a round-robin fashion. + +#ifndef SCUDO_N_CONTEXTS +# define SCUDO_N_CONTEXTS 4 +#endif + +static atomic_uint32_t ThreadContextCurrentIndex; +static ScudoThreadContext *ThreadContexts; + +static void initOnce() { + CHECK_EQ(pthread_key_create(&PThreadKey, NULL), 0); + initScudo(); + ThreadContexts = reinterpret_cast( + MmapOrDie(sizeof(ScudoThreadContext) * SCUDO_N_CONTEXTS, __func__)); + for (int i = 0; i < SCUDO_N_CONTEXTS; i++) + ThreadContexts[i].init(); +} + +void initThreadMaybe() { + pthread_once(&GlobalInited, initOnce); + if (pthread_getspecific(PThreadKey) == NULL) { + u32 Index = atomic_fetch_add(&ThreadContextCurrentIndex, 1, + memory_order_relaxed); + ScudoThreadContext *ThreadContext = + &ThreadContexts[Index % SCUDO_N_CONTEXTS]; + pthread_setspecific(PThreadKey, ThreadContext); + } +} + +ScudoThreadContext *getCurrentThreadContext() { + ScudoThreadContext *ThreadContext = + reinterpret_cast(pthread_getspecific(PThreadKey)); + CHECK(ThreadContext); + return ThreadContext; +} + +void ScudoThreadContext::Lock() { Mutex.Lock(); } +void ScudoThreadContext::Unlock() { Mutex.Unlock(); } + +} // namespace __scudo + +#endif Index: lib/scudo/scudo_thread_linux.cpp =================================================================== --- /dev/null +++ lib/scudo/scudo_thread_linux.cpp @@ -0,0 +1,73 @@ +//===-- scudo_thread_linux.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// TODO(kostyak): description. +/// +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" + +#if SANITIZER_LINUX && !SANITIZER_ANDROID + +#include "scudo_thread.h" + +#include +#include + +namespace __scudo { + +static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT; +static pthread_key_t PThreadKey; + +static thread_local bool ThreadInited = false; +static thread_local bool ThreadTornDown = false; +static thread_local ScudoThreadContext ThreadLocalContext; + +static void teardownThread(void *Ptr) { + uptr Iteration = reinterpret_cast(Ptr); + // The glibc POSIX thread-local-storage deallocation routine calls user + // provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS. + // We want to be called last since other destructors might call free and the + // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the + // quarantine and swallowing the cache. + if (Iteration < PTHREAD_DESTRUCTOR_ITERATIONS) { + pthread_setspecific(PThreadKey, reinterpret_cast(Iteration + 1)); + return; + } + ThreadLocalContext.commitBack(); + ThreadTornDown = true; +} + + +static void initOnce() { + CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread), 0); + initScudo(); +} + +void initThreadMaybe() { + if (LIKELY(ThreadInited)) + return; + pthread_once(&GlobalInited, initOnce); + pthread_setspecific(PThreadKey, reinterpret_cast(1)); + ThreadLocalContext.init(); + ThreadInited = true; +} + +ScudoThreadContext *getCurrentThreadContext() { + if (ThreadTornDown) + return nullptr; + return &ThreadLocalContext; +} + +void ScudoThreadContext::Lock() {} +void ScudoThreadContext::Unlock() {} + +} // namespace __scudo + +#endif // SANITIZER_LINUX && !SANITIZER_ANDROID Index: lib/scudo/scudo_utils.h =================================================================== --- lib/scudo/scudo_utils.h +++ lib/scudo/scudo_utils.h @@ -14,10 +14,10 @@ #ifndef SCUDO_UTILS_H_ #define SCUDO_UTILS_H_ -#include - #include "sanitizer_common/sanitizer_common.h" +#include + namespace __scudo { template @@ -40,8 +40,8 @@ // The state (128 bits) will be stored in thread local storage. struct Xorshift128Plus { public: - Xorshift128Plus(); - u64 Next() { + void initFromURandom(); + u64 getNext() { u64 x = State[0]; const u64 y = State[1]; State[0] = y; Index: lib/scudo/scudo_utils.cpp =================================================================== --- lib/scudo/scudo_utils.cpp +++ lib/scudo/scudo_utils.cpp @@ -153,9 +153,8 @@ } } -// Default constructor for Xorshift128Plus seeds the state with /dev/urandom. // TODO(kostyak): investigate using getrandom() if available. -Xorshift128Plus::Xorshift128Plus() { +void Xorshift128Plus::initFromURandom() { fillRandom(reinterpret_cast(State), sizeof(State)); }