diff --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h --- a/compiler-rt/lib/scudo/standalone/chunk.h +++ b/compiler-rt/lib/scudo/standalone/chunk.h @@ -65,7 +65,8 @@ struct UnpackedHeader { uptr ClassId : 8; u8 State : 2; - u8 Origin : 2; + // Origin if State == Allocated, or WasZeroed otherwise. + u8 OriginOrWasZeroed : 2; uptr SizeOrUnusedBytes : 20; uptr Offset : 16; uptr Checksum : 16; diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -275,8 +275,10 @@ } #endif // GWP_ASAN_HOOKS - const FillContentsMode FillContents = - ZeroContents ? ZeroFill : Options.FillContents; + const FillContentsMode FillContents = ZeroContents ? ZeroFill + : TSDRegistry.getDisableMemInit() + ? NoFill + : Options.FillContents; if (UNLIKELY(Alignment > MaxAlignment)) { if (Options.MayReturnNull) @@ -405,7 +407,17 @@ PrevEnd = NextPage; TaggedPtr = reinterpret_cast(TaggedUserPtr); resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd); - if (Size) { + if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) { + // If an allocation needs to be zeroed (i.e. calloc) we can normally + // avoid zeroing the memory now since we can rely on memory having + // been zeroed on free, as this is normally done while setting the + // UAF tag. But if tagging was disabled per-thread when the memory + // was freed, it would not have been retagged and thus zeroed, and + // therefore it needs to be zeroed now. + memset(TaggedPtr, 0, + Min(Size, roundUpTo(PrevEnd - TaggedUserPtr, + archMemoryTagGranuleSize()))); + } else if (Size) { // Clear any stack metadata that may have previously been stored in // the chunk data. memset(TaggedPtr, 0, archMemoryTagGranuleSize()); @@ -438,7 +450,7 @@ } Header.ClassId = ClassId & Chunk::ClassIdMask; Header.State = Chunk::State::Allocated; - Header.Origin = Origin & Chunk::OriginMask; + Header.OriginOrWasZeroed = Origin & Chunk::OriginMask; Header.SizeOrUnusedBytes = (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) & Chunk::SizeOrUnusedBytesMask; @@ -483,12 +495,12 @@ if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Deallocating, Ptr); if (Options.DeallocTypeMismatch) { - if (Header.Origin != Origin) { + if (Header.OriginOrWasZeroed != Origin) { // With the exception of memalign'd chunks, that can be still be free'd. - if (UNLIKELY(Header.Origin != Chunk::Origin::Memalign || + if (UNLIKELY(Header.OriginOrWasZeroed != Chunk::Origin::Memalign || Origin != Chunk::Origin::Malloc)) reportDeallocTypeMismatch(AllocatorAction::Deallocating, Ptr, - Header.Origin, Origin); + Header.OriginOrWasZeroed, Origin); } } @@ -541,9 +553,10 @@ // applications think that it is OK to realloc a memalign'ed pointer, which // will trigger this check. It really isn't. if (Options.DeallocTypeMismatch) { - if (UNLIKELY(OldHeader.Origin != Chunk::Origin::Malloc)) + if (UNLIKELY(OldHeader.OriginOrWasZeroed != Chunk::Origin::Malloc)) reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr, - OldHeader.Origin, Chunk::Origin::Malloc); + OldHeader.OriginOrWasZeroed, + Chunk::Origin::Malloc); } void *BlockBegin = getBlockBegin(OldPtr, &OldHeader); @@ -1017,14 +1030,17 @@ Chunk::UnpackedHeader NewHeader = *Header; if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) { u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); - uptr TaggedBegin, TaggedEnd; - const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), - SizeClassMap::getSizeByClassId(NewHeader.ClassId)); - // Exclude the previous tag so that immediate use after free is detected - // 100% of the time. - setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, - &TaggedEnd); + if (!TSDRegistry.getDisableMemInit()) { + uptr TaggedBegin, TaggedEnd; + const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( + reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), + SizeClassMap::getSizeByClassId(NewHeader.ClassId)); + // Exclude the previous tag so that immediate use after free is detected + // 100% of the time. + setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, + &TaggedEnd); + } + NewHeader.OriginOrWasZeroed = !TSDRegistry.getDisableMemInit(); storeDeallocationStackMaybe(Ptr, PrevTag); } // If the quarantine is disabled, the actual size of a chunk is 0 or larger diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -185,6 +185,8 @@ enum class Option : u8 { ReleaseInterval, // Release to OS interval in milliseconds. MemtagTuning, // Whether to tune tagging for UAF or overflow. + ThreadDisableMemInit, // Whether to disable automatic heap initialization and, + // where possible, memory tagging, on this thread. MaxCacheEntriesCount, // Maximum number of blocks that can be cached. MaxCacheEntrySize, // Maximum size of a block that can be cached. MaxTSDsCount, // Number of usable TSDs for the shared registry. diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h --- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -121,6 +121,14 @@ #define M_MEMTAG_TUNING -102 #endif +// Per-thread memory initialization tuning. The value argument should be one of: +// 1: Disable automatic heap initialization and, where possible, memory tagging, +// on this thread. +// 0: Normal behavior. +#ifndef M_THREAD_DISABLE_MEM_INIT +#define M_THREAD_DISABLE_MEM_INIT -103 +#endif + #ifndef M_CACHE_COUNT_MAX #define M_CACHE_COUNT_MAX -200 #endif diff --git a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp @@ -41,7 +41,7 @@ initChecksum(); const scudo::uptr Size = 0x100U; scudo::Chunk::UnpackedHeader OldHeader = {}; - OldHeader.Origin = scudo::Chunk::Origin::Malloc; + OldHeader.OriginOrWasZeroed = scudo::Chunk::Origin::Malloc; OldHeader.ClassId = 0x42U; OldHeader.SizeOrUnusedBytes = Size; OldHeader.State = scudo::Chunk::State::Allocated; diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -512,3 +512,44 @@ EXPECT_TRUE(Found); } } + +TEST(ScudoCombinedTest, DisableMemInit) { + using AllocatorT = TestAllocator; + using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap; + auto Allocator = std::unique_ptr(new AllocatorT()); + + std::vector Ptrs(65536, nullptr); + + Allocator->setOption(scudo::Option::ThreadDisableMemInit, 1); + + constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U); + + // Test that if mem-init is disabled on a thread, calloc should still work as + // expected. This is tricky to ensure when MTE is enabled, so this test tries + // to exercise the relevant code on our MTE path. + for (scudo::uptr ClassId = 1U; ClassId <= 8; ClassId++) { + const scudo::uptr Size = + SizeClassMap::getSizeByClassId(ClassId) - scudo::Chunk::getHeaderSize(); + if (Size < 8) + continue; + for (unsigned I = 0; I != Ptrs.size(); ++I) { + Ptrs[I] = Allocator->allocate(Size, Origin); + memset(Ptrs[I], 0xaa, Size); + } + for (unsigned I = 0; I != Ptrs.size(); ++I) + Allocator->deallocate(Ptrs[I], Origin, Size); + for (unsigned I = 0; I != Ptrs.size(); ++I) { + Ptrs[I] = Allocator->allocate(Size - 8, Origin); + memset(Ptrs[I], 0xbb, Size - 8); + } + for (unsigned I = 0; I != Ptrs.size(); ++I) + Allocator->deallocate(Ptrs[I], Origin, Size - 8); + for (unsigned I = 0; I != Ptrs.size(); ++I) { + Ptrs[I] = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); + for (scudo::uptr J = 0; J < Size; ++J) + ASSERT_EQ((reinterpret_cast(Ptrs[I]))[J], 0); + } + } + + Allocator->setOption(scudo::Option::ThreadDisableMemInit, 0); +} diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -13,10 +13,13 @@ namespace scudo { -enum class ThreadState : u8 { - NotInitialized = 0, - Initialized, - TornDown, +struct ThreadState { + bool DisableMemInit : 1; + enum { + NotInitialized = 0, + Initialized, + TornDown, + } InitState : 2; }; template void teardownThread(void *Ptr); @@ -36,13 +39,13 @@ void unmapTestOnly() {} ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { - if (LIKELY(State != ThreadState::NotInitialized)) + if (LIKELY(State.InitState != ThreadState::NotInitialized)) return; initThread(Instance, MinimalInit); } ALWAYS_INLINE TSD *getTSDAndLock(bool *UnlockRequired) { - if (LIKELY(State == ThreadState::Initialized && + if (LIKELY(State.InitState == ThreadState::Initialized && !atomic_load(&Disabled, memory_order_acquire))) { *UnlockRequired = false; return &ThreadTSD; @@ -67,11 +70,15 @@ } bool setOption(Option O, UNUSED sptr Value) { + if (O == Option::ThreadDisableMemInit) + State.DisableMemInit = Value; if (O == Option::MaxTSDsCount) return false; return true; } + bool getDisableMemInit() { return State.DisableMemInit; } + private: void initOnceMaybe(Allocator *Instance) { ScopedLock L(Mutex); @@ -90,7 +97,7 @@ CHECK_EQ( pthread_setspecific(PThreadKey, reinterpret_cast(Instance)), 0); ThreadTSD.initLinkerInitialized(Instance); - State = ThreadState::Initialized; + State.InitState = ThreadState::Initialized; Instance->callPostInitCallback(); } @@ -126,7 +133,7 @@ return; } TSDRegistryT::ThreadTSD.commitBack(Instance); - TSDRegistryT::State = ThreadState::TornDown; + TSDRegistryT::State.InitState = ThreadState::TornDown; } } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -83,10 +83,14 @@ bool setOption(Option O, sptr Value) { if (O == Option::MaxTSDsCount) return setNumberOfTSDs(static_cast(Value)); + if (O == Option::ThreadDisableMemInit) + setDisableMemInit(Value); // Not supported by the TSD Registry, but not an error either. return true; } + bool getDisableMemInit() const { return *getTlsPtr() & 1; } + private: ALWAYS_INLINE uptr *getTlsPtr() const { #if SCUDO_HAS_PLATFORM_TLS_SLOT @@ -97,12 +101,15 @@ #endif } + static_assert(alignof(TSD) >= 2, ""); + ALWAYS_INLINE void setCurrentTSD(TSD *CurrentTSD) { - *getTlsPtr() = reinterpret_cast(CurrentTSD); + *getTlsPtr() &= 1; + *getTlsPtr() |= reinterpret_cast(CurrentTSD); } ALWAYS_INLINE TSD *getCurrentTSD() { - return reinterpret_cast *>(*getTlsPtr()); + return reinterpret_cast *>(*getTlsPtr() & ~1ULL); } bool setNumberOfTSDs(u32 N) { @@ -131,6 +138,11 @@ return true; } + void setDisableMemInit(bool B) { + *getTlsPtr() &= ~1ULL; + *getTlsPtr() |= B; + } + void initOnceMaybe(Allocator *Instance) { ScopedLock L(Mutex); if (LIKELY(Initialized)) diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -179,6 +179,9 @@ case M_MEMTAG_TUNING: option = scudo::Option::MemtagTuning; break; + case M_THREAD_DISABLE_MEM_INIT: + option = scudo::Option::ThreadDisableMemInit; + break; case M_CACHE_COUNT_MAX: option = scudo::Option::MaxCacheEntriesCount; break;