diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -78,6 +78,7 @@ check_cxx_compiler_flag("-Werror -msse4.2" COMPILER_RT_HAS_MSSE4_2_FLAG) check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG) check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG) +check_cxx_compiler_flag("-Werror -mcx16" COMPILER_RT_HAS_MCX16_FLAG) check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG) if(NOT WIN32 AND NOT CYGWIN) diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -19,6 +19,8 @@ append_list_if(COMPILER_RT_HAS_FVISIBILITY_HIDDEN_FLAG -fvisibility=hidden SCUDO_CFLAGS) +append_list_if(COMPILER_RT_HAS_MCX16_FLAG -mcx16 SCUDO_CFLAGS) + # FIXME: find cleaner way to agree with GWPAsan flags append_list_if(COMPILER_RT_HAS_FNO_LTO_FLAG -fno-lto SCUDO_CFLAGS) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -448,7 +448,7 @@ computeOddEvenMaskForPointerMaybe(Options, BlockUptr, BlockSize); TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd); } - storeAllocationStackMaybe(Options, Ptr); + storePrimaryAllocationStackMaybe(Options, Ptr); } else { Block = addHeaderTag(Block); Ptr = addHeaderTag(Ptr); @@ -462,8 +462,10 @@ } else { Block = addHeaderTag(Block); Ptr = addHeaderTag(Ptr); - if (UNLIKELY(useMemoryTagging(Options))) + if (UNLIKELY(useMemoryTagging(Options))) { storeTags(reinterpret_cast(Block), reinterpret_cast(Ptr)); + storeSecondaryAllocationStackMaybe(Options, Ptr, Size); + } } Chunk::UnpackedHeader Header = {}; @@ -617,11 +619,15 @@ (reinterpret_cast(OldTaggedPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(ClassId && useMemoryTagging(Options))) { - resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, - reinterpret_cast(OldTaggedPtr) + NewSize, - BlockEnd); - storeAllocationStackMaybe(Options, OldPtr); + if (UNLIKELY(useMemoryTagging(Options))) { + if (ClassId) { + resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, + reinterpret_cast(OldTaggedPtr) + NewSize, + BlockEnd); + storePrimaryAllocationStackMaybe(Options, OldPtr); + } else { + storeSecondaryAllocationStackMaybe(Options, OldPtr, NewSize); + } } return OldTaggedPtr; } @@ -839,16 +845,102 @@ return PrimaryT::getRegionInfoArraySize(); } +#ifdef __LP64__ + const char *getRingBufferAddress() const { + return reinterpret_cast(&RingBuffer); + } + + static uptr getRingBufferSize() { return sizeof(RingBuffer); } +#else + const char *getRingBufferAddress() const { return nullptr; } + + static uptr getRingBufferSize() { return 0; } +#endif + + static const uptr MaxTraceSize = 64; + + static void collectTraceMaybe(const StackDepot *Depot, + uintptr_t (&Trace)[MaxTraceSize], u32 Hash) { + uptr RingPos, Size; + if (!Depot->find(Hash, &RingPos, &Size)) + return; + for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) + Trace[I] = (*Depot)[RingPos + I]; + } + + static void getSecondaryErrorInfo(struct scudo_error_info *ErrorInfo, + uintptr_t FaultAddr, + const StackDepot *Depot, + const char *RingBufferPtr) { + (void)ErrorInfo; + (void)FaultAddr; + (void)Depot; + (void)RingBufferPtr; +#ifdef __LP64__ + auto *RingBuffer = + reinterpret_cast(RingBufferPtr); + uptr Pos = atomic_load_relaxed(&RingBuffer->Pos); + size_t NextErrorReport = 0; + + for (uptr I = Pos - 1; I != Pos - 1 - SecondaryRingBuffer::NumEntries; + --I) { + auto *Entry = &RingBuffer->Entries[I % SecondaryRingBuffer::NumEntries]; + uptr EntryPtr = untagPointer(atomic_load_relaxed(&Entry->Ptr)); + uptr EntrySize = atomic_load_relaxed(&Entry->AllocationSize); + if (!EntryPtr || FaultAddr < EntryPtr - getPageSizeCached() || + FaultAddr >= EntryPtr + EntrySize + getPageSizeCached()) + continue; + + u32 AllocationTrace = atomic_load_relaxed(&Entry->AllocationTrace); + u32 AllocationTid = atomic_load_relaxed(&Entry->AllocationTid); + u32 DeallocationTrace = Entry->DeallocationTrace; + u32 DeallocationTid = Entry->DeallocationTid; + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + if (DeallocationTid) + R->error_type = USE_AFTER_FREE; + else if (FaultAddr < EntryPtr) + R->error_type = BUFFER_UNDERFLOW; + else + R->error_type = BUFFER_OVERFLOW; + + R->allocation_address = EntryPtr; + R->allocation_size = EntrySize; + collectTraceMaybe(Depot, R->allocation_trace, AllocationTrace); + R->allocation_tid = AllocationTid; + collectTraceMaybe(Depot, R->deallocation_trace, DeallocationTrace); + R->deallocation_tid = DeallocationTid; + + if (NextErrorReport == + sizeof(ErrorInfo->reports) / sizeof(ErrorInfo->reports[0])) + return; + } +#endif + } + static void getErrorInfo(struct scudo_error_info *ErrorInfo, uintptr_t FaultAddr, const char *DepotPtr, - const char *RegionInfoPtr, const char *Memory, - const char *MemoryTags, uintptr_t MemoryAddr, - size_t MemorySize) { + const char *RegionInfoPtr, const char *RingBufferPtr, + const char *Memory, const char *MemoryTags, + uintptr_t MemoryAddr, size_t MemorySize) { *ErrorInfo = {}; if (!allocatorSupportsMemoryTagging() || MemoryAddr + MemorySize < MemoryAddr) return; + auto *Depot = reinterpret_cast(DepotPtr); + if (extractTag(FaultAddr) == 0) + getSecondaryErrorInfo(ErrorInfo, FaultAddr, Depot, RingBufferPtr); + else + getPrimaryErrorInfo(ErrorInfo, FaultAddr, Depot, RegionInfoPtr, Memory, + MemoryTags, MemoryAddr, MemorySize); + } + + static void getPrimaryErrorInfo(struct scudo_error_info *ErrorInfo, + uintptr_t FaultAddr, const StackDepot *Depot, + const char *RegionInfoPtr, const char *Memory, + const char *MemoryTags, uintptr_t MemoryAddr, + size_t MemorySize) { uptr UntaggedFaultAddr = untagPointer(FaultAddr); u8 FaultAddrTag = extractTag(FaultAddr); BlockInfo Info = @@ -883,16 +975,6 @@ return true; }; - auto *Depot = reinterpret_cast(DepotPtr); - - auto MaybeCollectTrace = [&](uintptr_t(&Trace)[MaxTraceSize], u32 Hash) { - uptr RingPos, Size; - if (!Depot->find(Hash, &RingPos, &Size)) - return; - for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) - Trace[I] = (*Depot)[RingPos + I]; - }; - size_t NextErrorReport = 0; // First, check for UAF. @@ -908,10 +990,10 @@ R->error_type = USE_AFTER_FREE; R->allocation_address = ChunkAddr; R->allocation_size = Header.SizeOrUnusedBytes; - MaybeCollectTrace(R->allocation_trace, + collectTraceMaybe(Depot, R->allocation_trace, Data[MemTagAllocationTraceIndex]); R->allocation_tid = Data[MemTagAllocationTidIndex]; - MaybeCollectTrace(R->deallocation_trace, + collectTraceMaybe(Depot, R->deallocation_trace, Data[MemTagDeallocationTraceIndex]); R->deallocation_tid = Data[MemTagDeallocationTidIndex]; } @@ -934,7 +1016,8 @@ UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; R->allocation_address = ChunkAddr; R->allocation_size = Header.SizeOrUnusedBytes; - MaybeCollectTrace(R->allocation_trace, Data[MemTagAllocationTraceIndex]); + collectTraceMaybe(Depot, R->allocation_trace, + Data[MemTagAllocationTraceIndex]); R->allocation_tid = Data[MemTagAllocationTidIndex]; return NextErrorReport == sizeof(ErrorInfo->reports) / sizeof(ErrorInfo->reports[0]); @@ -988,8 +1071,6 @@ static const sptr MemTagDeallocationTidIndex = 1; static const sptr MemTagPrevTagIndex = 2; - static const uptr MaxTraceSize = 64; - u32 Cookie; u32 QuarantineMaxChunkSize; @@ -1005,6 +1086,33 @@ StackDepot Depot; +#ifdef __LP64__ + struct SecondaryRingBuffer { + // This part of the data structure must have 16-byte size and alignment + // because we modify it using 16-byte atomic instructions. + struct AtomicEntry { + alignas(16) atomic_uptr Ptr; + u32 DeallocationTrace; + u32 DeallocationTid; + }; + + struct Entry : AtomicEntry { + atomic_uptr AllocationSize; + atomic_u32 AllocationTrace; + atomic_u32 AllocationTid; + }; + + atomic_uptr Pos; +#ifdef SCUDO_FUZZ + static const uptr NumEntries = 2; +#else + static const uptr NumEntries = 1024; +#endif + Entry Entries[NumEntries]; + }; + SecondaryRingBuffer RingBuffer; +#endif + // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { // Verify that the header offset field can hold the maximum offset. In the @@ -1061,20 +1169,24 @@ void quarantineOrDeallocateChunk(Options Options, void *Ptr, Chunk::UnpackedHeader *Header, uptr Size) { Chunk::UnpackedHeader NewHeader = *Header; - if (UNLIKELY(NewHeader.ClassId && useMemoryTagging(Options))) { - u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); - if (!TSDRegistry.getDisableMemInit()) { - uptr TaggedBegin, TaggedEnd; - const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - Options, reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), - SizeClassMap::getSizeByClassId(NewHeader.ClassId)); - // Exclude the previous tag so that immediate use after free is detected - // 100% of the time. - setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, - &TaggedEnd); + if (UNLIKELY(useMemoryTagging(Options))) { + if (NewHeader.ClassId) { + u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); + if (!TSDRegistry.getDisableMemInit()) { + uptr TaggedBegin, TaggedEnd; + const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( + Options, reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), + SizeClassMap::getSizeByClassId(NewHeader.ClassId)); + // Exclude the previous tag so that immediate use after free is + // detected 100% of the time. + setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, + &TaggedEnd); + } + NewHeader.OriginOrWasZeroed = !TSDRegistry.getDisableMemInit(); + storePrimaryDeallocationStackMaybe(Options, Ptr, PrevTag); + } else { + storeSecondaryDeallocationStackMaybe(Options, Ptr); } - NewHeader.OriginOrWasZeroed = !TSDRegistry.getDisableMemInit(); - storeDeallocationStackMaybe(Options, Ptr, PrevTag); } // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. @@ -1127,7 +1239,7 @@ return Offset + Chunk::getHeaderSize(); } - void storeAllocationStackMaybe(Options Options, void *Ptr) { + void storePrimaryAllocationStackMaybe(Options Options, void *Ptr) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; auto *Ptr32 = reinterpret_cast(Ptr); @@ -1135,8 +1247,8 @@ Ptr32[MemTagAllocationTidIndex] = getThreadID(); } - void storeDeallocationStackMaybe(Options Options, void *Ptr, - uint8_t PrevTag) { + void storePrimaryDeallocationStackMaybe(Options Options, void *Ptr, + uint8_t PrevTag) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; @@ -1149,6 +1261,73 @@ Ptr32[MemTagPrevTagIndex] = PrevTag; } + void storeSecondaryAllocationStackMaybe(Options Options, void *Ptr, + uptr Size) { + (void)Options; + (void)Ptr; + (void)Size; +#ifdef __LP64__ + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) + return; + + uptr Pos = atomic_fetch_add(&RingBuffer.Pos, 1, memory_order_relaxed); + typename SecondaryRingBuffer::Entry *Entry = + &RingBuffer.Entries[Pos % SecondaryRingBuffer::NumEntries]; + + // First invalidate our entry so that we don't attempt to interpret a + // partially written state in getSecondaryErrorInfo(). + typename SecondaryRingBuffer::AtomicEntry NewEntry = {}; + __atomic_store( + static_cast(Entry), + &NewEntry, memory_order_relaxed); + atomic_store_relaxed(&Entry->AllocationTrace, collectStackTrace()); + atomic_store_relaxed(&Entry->AllocationTid, getThreadID()); + atomic_store_relaxed(&Entry->AllocationSize, Size); + + atomic_store_relaxed(&NewEntry.Ptr, reinterpret_cast(Ptr)); + __atomic_store( + static_cast(Entry), + &NewEntry, memory_order_relaxed); +#endif + } + + void storeSecondaryDeallocationStackMaybe(Options Options, void *Ptr) { + (void)Options; + (void)Ptr; +#ifdef __LP64__ + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) + return; + + u32 DeallocationTrace = collectStackTrace(); + u32 DeallocationTid = getThreadID(); + + uptr Pos = atomic_load_relaxed(&RingBuffer.Pos); + for (uptr I = Pos - 1; I != Pos - 1 - SecondaryRingBuffer::NumEntries; + --I) { + auto *Entry = &RingBuffer.Entries[I % SecondaryRingBuffer::NumEntries]; + uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr); + if (EntryPtr != reinterpret_cast(Ptr)) + continue; + + // We only take one shot at recording our stack trace. If we fail then it + // means that our pointer was overwritten by a concurrent allocation. In + // that case we wouldn't have anywhere to store the deallocation stack + // trace anyway. + typename SecondaryRingBuffer::AtomicEntry OldEntry = {}; + atomic_store_relaxed(&OldEntry.Ptr, EntryPtr); + + auto NewEntry = OldEntry; + NewEntry.DeallocationTrace = DeallocationTrace; + NewEntry.DeallocationTid = DeallocationTid; + __atomic_compare_exchange( + static_cast(Entry), + &OldEntry, &NewEntry, false, memory_order_relaxed, + memory_order_relaxed); + break; + } +#endif + } + uptr getStats(ScopedString *Str) { Primary.getStats(Str); Secondary.getStats(Str); diff --git a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp --- a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp +++ b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp @@ -37,16 +37,24 @@ StackDepot[i] = StackDepotBytes[i]; } - std::string RegionInfoBytes = FDP.ConsumeRemainingBytesAsString(); + std::string RegionInfoBytes = + FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); std::vector RegionInfo(AllocatorT::getRegionInfoArraySize(), 0); for (size_t i = 0; i < RegionInfoBytes.length() && i < RegionInfo.size(); ++i) { RegionInfo[i] = RegionInfoBytes[i]; } + std::string RingBufferBytes = FDP.ConsumeRemainingBytesAsString(); + std::vector RingBuffer(AllocatorT::getRingBufferSize(), 0); + for (size_t i = 0; i < RingBufferBytes.length() && i < RingBuffer.size(); + ++i) { + RingBuffer[i] = RingBufferBytes[i]; + } + scudo_error_info ErrorInfo; AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), - RegionInfo.data(), Memory, MemoryTags, MemoryAddr, - MemorySize); + RegionInfo.data(), RingBuffer.data(), Memory, + MemoryTags, MemoryAddr, MemorySize); return 0; } diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h --- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -72,9 +72,9 @@ // pointer. void __scudo_get_error_info(struct scudo_error_info *error_info, uintptr_t fault_addr, const char *stack_depot, - const char *region_info, const char *memory, - const char *memory_tags, uintptr_t memory_addr, - size_t memory_size); + const char *region_info, const char *ring_buffer, + const char *memory, const char *memory_tags, + uintptr_t memory_addr, size_t memory_size); enum scudo_error_type { UNKNOWN, @@ -106,6 +106,9 @@ const char *__scudo_get_region_info_addr(); size_t __scudo_get_region_info_size(); +const char *__scudo_get_ring_buffer_addr(); +size_t __scudo_get_ring_buffer_size(); + #ifndef M_DECAY_TIME #define M_DECAY_TIME -100 #endif diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt --- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt @@ -25,6 +25,8 @@ list(APPEND SCUDO_UNITTEST_CFLAGS -DGWP_ASAN_HOOKS) endif() +append_list_if(COMPILER_RT_HAS_MCX16_FLAG -mcx16 SCUDO_UNITTEST_CFLAGS) + set(SCUDO_TEST_ARCH ${SCUDO_STANDALONE_SUPPORTED_ARCH}) # gtests requires c++ diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -48,12 +48,15 @@ // TODO(kostyak): support both allocators. INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } -INTERFACE void __scudo_get_error_info( - struct scudo_error_info *error_info, uintptr_t fault_addr, - const char *stack_depot, const char *region_info, const char *memory, - const char *memory_tags, uintptr_t memory_addr, size_t memory_size) { +INTERFACE void +__scudo_get_error_info(struct scudo_error_info *error_info, + uintptr_t fault_addr, const char *stack_depot, + const char *region_info, const char *ring_buffer, + const char *memory, const char *memory_tags, + uintptr_t memory_addr, size_t memory_size) { Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info, - memory, memory_tags, memory_addr, memory_size); + ring_buffer, memory, memory_tags, memory_addr, + memory_size); } INTERFACE const char *__scudo_get_stack_depot_addr() { @@ -72,4 +75,12 @@ return Allocator.getRegionInfoArraySize(); } +INTERFACE const char *__scudo_get_ring_buffer_addr() { + return Allocator.getRingBufferAddress(); +} + +INTERFACE size_t __scudo_get_ring_buffer_size() { + return Allocator.getRingBufferSize(); +} + #endif // SCUDO_ANDROID && _BIONIC diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn --- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn @@ -53,13 +53,6 @@ "wrappers_c_checks.h", ] - if (current_cpu == "arm" || current_cpu == "arm64") { - cflags += [ "-mcrc" ] - } - if (current_cpu == "x64") { - cflags += [ "-msse4.2" ] - } - public_configs = [ ":scudo_config" ] } @@ -86,7 +79,18 @@ ".", "include", ] + + cflags = [] if (current_os == "android") { - cflags = [ "-fno-emulated-tls" ] + cflags += [ "-fno-emulated-tls" ] + } + if (current_cpu == "arm" || current_cpu == "arm64") { + cflags += [ "-mcrc" ] + } + if (current_cpu == "x64") { + cflags += [ + "-msse4.2", + "-mcx16", + ] } }