diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt --- a/compiler-rt/lib/CMakeLists.txt +++ b/compiler-rt/lib/CMakeLists.txt @@ -59,3 +59,9 @@ if(COMPILER_RT_BUILD_LIBFUZZER) compiler_rt_build_runtime(fuzzer) endif() + +# It doesn't normally make sense to build runtimes when a sanitizer is enabled, +# so we don't add_subdirectory the runtimes in that case. However, the opposite +# is true for fuzzers that exercise parts of the runtime. So we add the fuzzer +# directories explicitly here. +add_subdirectory(scudo/standalone/fuzz) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -18,6 +18,7 @@ #include "quarantine.h" #include "report.h" #include "secondary.h" +#include "stack_depot.h" #include "string_utils.h" #include "tsd.h" @@ -31,6 +32,14 @@ extern "C" inline void EmptyCallback() {} +#if SCUDO_ANDROID && __ANDROID_API__ == 10000 +// This function is not part of the NDK so it does not appear in any public +// header files. We only declare/use it when targeting the platform (i.e. API +// level 10000). +extern "C" size_t android_unsafe_frame_pointer_chase(scudo::uptr *buf, + size_t num_entries); +#endif + namespace scudo { enum class Option { ReleaseInterval }; @@ -142,6 +151,7 @@ Options.ZeroContents = getFlags()->zero_contents; Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch; Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch; + Options.TrackAllocationStacks = false; Options.QuarantineMaxChunkSize = static_cast(getFlags()->quarantine_max_chunk_size); @@ -221,6 +231,20 @@ return Ptr; } + NOINLINE u32 collectStackTrace() { +#if SCUDO_ANDROID && __ANDROID_API__ == 10000 + // Discard collectStackTrace() frame and allocator function frame. + constexpr uptr DiscardFrames = 2; + uptr Stack[MaxTraceSize + DiscardFrames]; + uptr Size = + android_unsafe_frame_pointer_chase(Stack, MaxTraceSize + DiscardFrames); + Size = Min(Size, MaxTraceSize + DiscardFrames); + return Depot.insert(Stack + Min(DiscardFrames, Size), Stack + Size); +#else + return 0; +#endif + } + NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, uptr Alignment = MinAlignment, bool ZeroContents = false) { @@ -359,9 +383,15 @@ PrevEnd = NextPage; TaggedPtr = reinterpret_cast(TaggedUserPtr); resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd); + if (Size) { + // Clear any stack metadata that may have previously been stored in + // the chunk data. + memset(TaggedPtr, 0, archMemoryTagGranuleSize()); + } } else { TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd); } + storeAllocationStackMaybe(Ptr); } else if (UNLIKELY(ZeroContents)) { // This condition is not necessarily unlikely, but since memset is // costly, we might as well mark it as such. @@ -515,10 +545,12 @@ : BlockEnd - (reinterpret_cast(OldPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(ClassId && useMemoryTagging())) + if (UNLIKELY(ClassId && useMemoryTagging())) { resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, reinterpret_cast(OldTaggedPtr) + NewSize, BlockEnd); + storeAllocationStackMaybe(OldPtr); + } return OldTaggedPtr; } } @@ -689,6 +721,135 @@ void disableMemoryTagging() { Primary.disableMemoryTagging(); } + void setTrackAllocationStacks(bool Track) { + Options.TrackAllocationStacks = Track; + } + + const char *getStackDepotAddress() const { + return reinterpret_cast(&Depot); + } + + const char *getRegionInfoArrayAddress() const { + return Primary.getRegionInfoArrayAddress(); + } + + static uptr getRegionInfoArraySize() { + return PrimaryT::getRegionInfoArraySize(); + } + + static void getErrorInfo(struct scudo_error_info *ErrorInfo, + uintptr_t FaultAddr, const char *DepotPtr, + const char *RegionInfoPtr, const char *Memory, + const char *MemoryTags, uintptr_t MemoryAddr, + size_t MemorySize) { + *ErrorInfo = {}; + if (!PrimaryT::SupportsMemoryTagging || + MemoryAddr + MemorySize < MemoryAddr) + return; + + uptr UntaggedFaultAddr = untagPointer(FaultAddr); + u8 FaultAddrTag = extractTag(FaultAddr); + BlockInfo Info = + PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr); + + auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool { + if (Addr < MemoryAddr || + Addr + archMemoryTagGranuleSize() < Addr || + Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize) + return false; + *Data = &Memory[Addr - MemoryAddr]; + *Tag = static_cast( + MemoryTags[(Addr - MemoryAddr) / archMemoryTagGranuleSize()]); + return true; + }; + + auto ReadBlock = [&](uptr Addr, uptr *ChunkAddr, + Chunk::UnpackedHeader *Header, const u32 **Data, + u8 *Tag) { + const char *BlockBegin; + u8 BlockBeginTag; + if (!GetGranule(Addr, &BlockBegin, &BlockBeginTag)) + return false; + uptr ChunkOffset = getChunkOffsetFromBlock(BlockBegin); + *ChunkAddr = Addr + ChunkOffset; + + const char *ChunkBegin; + if (!GetGranule(*ChunkAddr, &ChunkBegin, Tag)) + return false; + *Header = *reinterpret_cast( + ChunkBegin - Chunk::getHeaderSize()); + *Data = reinterpret_cast(ChunkBegin); + return true; + }; + + auto *Depot = reinterpret_cast(DepotPtr); + + auto MaybeCollectTrace = [&](uintptr_t(&Trace)[MaxTraceSize], u32 Hash) { + uptr RingPos, Size; + if (!Depot->find(Hash, &RingPos, &Size)) + return; + for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) + Trace[I] = (*Depot)[RingPos + I]; + }; + + size_t NextErrorReport = 0; + + // First, check for UAF. + { + uptr ChunkAddr; + Chunk::UnpackedHeader Header; + const u32 *Data; + uint8_t Tag; + if (ReadBlock(Info.BlockBegin, &ChunkAddr, &Header, &Data, &Tag) && + Header.State != Chunk::State::Allocated && + Data[MemTagPrevTagIndex] == FaultAddrTag) { + auto *R = &ErrorInfo->reports[NextErrorReport++]; + R->error_type = USE_AFTER_FREE; + R->allocation_address = ChunkAddr; + R->allocation_size = Header.SizeOrUnusedBytes; + MaybeCollectTrace(R->allocation_trace, + Data[MemTagAllocationTraceIndex]); + R->allocation_tid = Data[MemTagAllocationTidIndex]; + MaybeCollectTrace(R->deallocation_trace, + Data[MemTagDeallocationTraceIndex]); + R->deallocation_tid = Data[MemTagDeallocationTidIndex]; + } + } + + auto CheckOOB = [&](uptr BlockAddr) { + if (BlockAddr < Info.RegionBegin || BlockAddr >= Info.RegionEnd) + return false; + + uptr ChunkAddr; + Chunk::UnpackedHeader Header; + const u32 *Data; + uint8_t Tag; + if (!ReadBlock(BlockAddr, &ChunkAddr, &Header, &Data, &Tag) || + Header.State != Chunk::State::Allocated || Tag != FaultAddrTag) + return false; + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + R->error_type = + UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; + R->allocation_address = ChunkAddr; + R->allocation_size = Header.SizeOrUnusedBytes; + MaybeCollectTrace(R->allocation_trace, Data[MemTagAllocationTraceIndex]); + R->allocation_tid = Data[MemTagAllocationTidIndex]; + return NextErrorReport == + sizeof(ErrorInfo->reports) / sizeof(ErrorInfo->reports[0]); + }; + + if (CheckOOB(Info.BlockBegin)) + return; + + // Check for OOB in the 30 surrounding blocks. Beyond that we are likely to + // hit false positives. + for (int I = 1; I != 16; ++I) + if (CheckOOB(Info.BlockBegin + I * Info.BlockSize) || + CheckOOB(Info.BlockBegin - I * Info.BlockSize)) + return; + } + private: using SecondaryT = typename Params::Secondary; typedef typename PrimaryT::SizeClassMap SizeClassMap; @@ -708,6 +869,26 @@ static const u32 BlockMarker = 0x44554353U; + // These are indexes into an "array" of 32-bit values that store information + // inline with a chunk that is relevant to diagnosing memory tag faults, where + // 0 corresponds to the address of the user memory. This means that negative + // indexes may be used to store information about allocations, while positive + // indexes may only be used to store information about deallocations, because + // the user memory is in use until it has been deallocated. The smallest index + // that may be used is -2, which corresponds to 8 bytes before the user + // memory, because the chunk header size is 8 bytes and in allocators that + // support memory tagging the minimum alignment is at least the tag granule + // size (16 on aarch64), and the largest index that may be used is 3 because + // we are only guaranteed to have at least a granule's worth of space in the + // user memory. + static const sptr MemTagAllocationTraceIndex = -2; + static const sptr MemTagAllocationTidIndex = -1; + static const sptr MemTagDeallocationTraceIndex = 0; + static const sptr MemTagDeallocationTidIndex = 1; + static const sptr MemTagPrevTagIndex = 2; + + static const uptr MaxTraceSize = 64; + GlobalStats Stats; TSDRegistryT TSDRegistry; PrimaryT Primary; @@ -721,6 +902,7 @@ u8 ZeroContents : 1; // zero_contents u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch u8 DeleteSizeMismatch : 1; // delete_size_mismatch + u8 TrackAllocationStacks : 1; u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size } Options; @@ -728,6 +910,8 @@ gwp_asan::GuardedPoolAllocator GuardedAlloc; #endif // GWP_ASAN_HOOKS + StackDepot Depot; + // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { // Verify that the header offset field can hold the maximum offset. In the @@ -787,8 +971,10 @@ uptr Size) { Chunk::UnpackedHeader NewHeader = *Header; if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) { + u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); uptr TaggedBegin, TaggedEnd; setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd); + storeDeallocationStackMaybe(Ptr, PrevTag); } // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. @@ -824,13 +1010,39 @@ bool getChunkFromBlock(uptr Block, uptr *Chunk, Chunk::UnpackedHeader *Header) { - u32 Offset = 0; - if (reinterpret_cast(Block)[0] == BlockMarker) - Offset = reinterpret_cast(Block)[1]; - *Chunk = Block + Offset + Chunk::getHeaderSize(); + *Chunk = + Block + getChunkOffsetFromBlock(reinterpret_cast(Block)); return Chunk::isValid(Cookie, reinterpret_cast(*Chunk), Header); } + static uptr getChunkOffsetFromBlock(const char *Block) { + u32 Offset = 0; + if (reinterpret_cast(Block)[0] == BlockMarker) + Offset = reinterpret_cast(Block)[1]; + return Offset + Chunk::getHeaderSize(); + } + + void storeAllocationStackMaybe(void *Ptr) { + if (!UNLIKELY(Options.TrackAllocationStacks)) + return; + auto *Ptr32 = reinterpret_cast(Ptr); + Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(); + Ptr32[MemTagAllocationTidIndex] = getThreadID(); + } + + void storeDeallocationStackMaybe(void *Ptr, uint8_t PrevTag) { + if (!UNLIKELY(Options.TrackAllocationStacks)) + return; + + // Disable tag checks here so that we don't need to worry about zero sized + // allocations. + ScopedDisableMemoryTagChecks x; + auto *Ptr32 = reinterpret_cast(Ptr); + Ptr32[MemTagDeallocationTraceIndex] = collectStackTrace(); + Ptr32[MemTagDeallocationTidIndex] = getThreadID(); + Ptr32[MemTagPrevTagIndex] = PrevTag; + } + uptr getStats(ScopedString *Str) { Primary.getStats(Str); Secondary.getStats(Str); diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -133,6 +133,8 @@ u64 getMonotonicTime(); +u32 getThreadID(); + // Our randomness gathering function is limited to 256 bytes to ensure we get // as many bytes as requested, and avoid interruptions (on Linux). constexpr uptr MaxRandomLength = 256U; @@ -173,6 +175,13 @@ void setAbortMessage(const char *Message); +struct BlockInfo { + uptr BlockBegin; + uptr BlockSize; + uptr RegionBegin; + uptr RegionEnd; +}; + } // namespace scudo #endif // SCUDO_COMMON_H_ diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -170,6 +170,8 @@ u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } +u32 getThreadID() { return 0; } + bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { static_assert(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN, ""); if (UNLIKELY(!Buffer || !Length || Length > MaxRandomLength)) diff --git a/compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt @@ -0,0 +1,12 @@ +if (LLVM_USE_SANITIZE_COVERAGE) + add_executable(get_error_info_fuzzer + get_error_info_fuzzer.cpp) + set_target_properties( + get_error_info_fuzzer PROPERTIES FOLDER "Fuzzers") + target_compile_options( + get_error_info_fuzzer PRIVATE -fsanitize=fuzzer) + set_target_properties( + get_error_info_fuzzer PROPERTIES LINK_FLAGS -fsanitize=fuzzer) + target_include_directories( + get_error_info_fuzzer PRIVATE .. ../include) +endif() diff --git a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp @@ -0,0 +1,48 @@ +//===-- get_error_info_fuzzer.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SCUDO_FUZZ +#include "allocator_config.h" +#include "combined.h" + +#include + +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { + using AllocatorT = scudo::Allocator; + FuzzedDataProvider FDP(Data, Size); + + uintptr_t FaultAddr = FDP.ConsumeIntegral(); + uintptr_t MemoryAddr = FDP.ConsumeIntegral(); + + std::string MemoryAndTags = FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + const char *Memory = MemoryAndTags.c_str(); + // Assume 16-byte alignment. + size_t MemorySize = (MemoryAndTags.length() / 17) * 16; + const char *MemoryTags = Memory + MemorySize; + + std::string StackDepotBytes = FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); + std::vector StackDepot(sizeof(scudo::StackDepot), 0); + for (size_t i = 0; i < StackDepotBytes.length() && i < StackDepot.size(); ++i) { + StackDepot[i] = StackDepotBytes[i]; + } + + std::string RegionInfoBytes = FDP.ConsumeRemainingBytesAsString(); + std::vector RegionInfo(AllocatorT::getRegionInfoArraySize(), 0); + for (size_t i = 0; i < RegionInfoBytes.length() && i < RegionInfo.size(); ++i) { + RegionInfo[i] = RegionInfoBytes[i]; + } + + scudo_error_info ErrorInfo; + AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), + RegionInfo.data(), Memory, MemoryTags, MemoryAddr, + MemorySize); + return 0; +} diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h --- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -9,6 +9,8 @@ #ifndef SCUDO_INTERFACE_H_ #define SCUDO_INTERFACE_H_ +#include + extern "C" { __attribute__((weak)) const char *__scudo_default_options(); @@ -22,6 +24,87 @@ typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); +// Determine the likely cause of a tag check fault or other memory protection +// error on a system with memory tagging support. The results are returned via +// the error_info data structure. Up to three possible causes are returned in +// the reports array, in decreasing order of probability. The remaining elements +// of reports are zero-initialized. +// +// This function may be called from a different process from the one that +// crashed. In this case, various data structures must be copied from the +// crashing process to the process that analyzes the crash. +// +// This interface is not guaranteed to be stable and may change at any time. +// Furthermore, the version of scudo in the crashing process must be the same as +// the version in the process that analyzes the crash. +// +// fault_addr is the fault address. On aarch64 this is available in the system +// register FAR_ELx, or far_context.far in an upcoming release of the Linux +// kernel. This address must include the pointer tag; note that the kernel +// strips the tag from the fields siginfo.si_addr and sigcontext.fault_address, +// so these addresses are not suitable to be passed as fault_addr. +// +// stack_depot is a pointer to the stack depot data structure, which may be +// obtained by calling the function __scudo_get_stack_depot_addr() in the +// crashing process. The size of the stack depot is available by calling the +// function __scudo_get_stack_depot_size(). +// +// region_info is a pointer to the region info data structure, which may be +// obtained by calling the function __scudo_get_region_info_addr() in the +// crashing process. The size of the region info is available by calling the +// function __scudo_get_region_info_size(). +// +// memory is a pointer to a region of memory surrounding the fault address. +// The more memory available via this pointer, the more likely it is that the +// function will be able to analyze a crash correctly. It is recommended to +// provide an amount of memory equal to 16 * the primary allocator's largest +// size class either side of the fault address. +// +// memory_tags is a pointer to an array of memory tags for the memory accessed +// via memory. Each byte of this array corresponds to a region of memory of size +// equal to the architecturally defined memory tag granule size (16 on aarch64). +// +// memory_addr is the start address of memory in the crashing process's address +// space. +// +// memory_size is the size of the memory region referred to by the memory +// pointer. +void __scudo_get_error_info(struct scudo_error_info *error_info, + uintptr_t fault_addr, const char *stack_depot, + const char *region_info, const char *memory, + const char *memory_tags, uintptr_t memory_addr, + size_t memory_size); + +enum scudo_error_type { + UNKNOWN, + USE_AFTER_FREE, + BUFFER_OVERFLOW, + BUFFER_UNDERFLOW, +}; + +struct scudo_error_report { + enum scudo_error_type error_type; + + uintptr_t allocation_address; + uintptr_t allocation_size; + + uint32_t allocation_tid; + uintptr_t allocation_trace[64]; + + uint32_t deallocation_tid; + uintptr_t deallocation_trace[64]; +}; + +struct scudo_error_info { + struct scudo_error_report reports[3]; +}; + +const char *__scudo_get_stack_depot_addr(); +size_t __scudo_get_stack_depot_size(); + +const char *__scudo_get_region_info_addr(); +size_t __scudo_get_region_info_size(); + } // extern "C" #endif // SCUDO_INTERFACE_H_ diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -139,6 +139,14 @@ return static_cast(CPU_COUNT(&CPUs)); } +u32 getThreadID() { +#if SCUDO_ANDROID + return static_cast(gettid()); +#else + return static_cast(syscall(SYS_gettid)); +#endif +} + // Blocking is possibly unused if the getrandom block is not compiled in. bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { if (!Buffer || !Length || Length > MaxRandomLength) diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -21,11 +21,39 @@ namespace scudo { -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(SCUDO_FUZZ) inline constexpr bool archSupportsMemoryTagging() { return true; } inline constexpr uptr archMemoryTagGranuleSize() { return 16; } +inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); } + +inline uint8_t extractTag(uptr Ptr) { + return (Ptr >> 56) & 0xf; +} + +#else + +inline constexpr bool archSupportsMemoryTagging() { return false; } + +inline uptr archMemoryTagGranuleSize() { + UNREACHABLE("memory tagging not supported"); +} + +inline uptr untagPointer(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + +inline uint8_t extractTag(uptr Ptr) { + (void)Ptr; + UNREACHABLE("memory tagging not supported"); +} + +#endif + +#if defined(__aarch64__) + inline bool systemSupportsMemoryTagging() { #if defined(ANDROID_EXPERIMENTAL_MTE) return getauxval(AT_HWCAP2) & HWCAP2_MTE; @@ -51,7 +79,19 @@ __asm__ __volatile__(".arch_extension mte; msr tco, #0"); } -inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); } +class ScopedDisableMemoryTagChecks { + size_t PrevTCO; + + public: + ScopedDisableMemoryTagChecks() { + __asm__ __volatile__(".arch_extension mte; mrs %0, tco; msr tco, #1" + : "=r"(PrevTCO)); + } + + ~ScopedDisableMemoryTagChecks() { + __asm__ __volatile__(".arch_extension mte; msr tco, %0" : : "r"(PrevTCO)); + } +}; inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, uptr *TaggedEnd) { @@ -154,10 +194,6 @@ : "memory"); } -inline uptr tagPointer(uptr UntaggedPtr, uptr Tag) { - return UntaggedPtr | (Tag & (0xfUL << 56)); -} - inline uptr loadTag(uptr Ptr) { uptr TaggedPtr = Ptr; __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" @@ -169,8 +205,6 @@ #else -inline constexpr bool archSupportsMemoryTagging() { return false; } - inline bool systemSupportsMemoryTagging() { UNREACHABLE("memory tagging not supported"); } @@ -179,10 +213,6 @@ UNREACHABLE("memory tagging not supported"); } -inline uptr archMemoryTagGranuleSize() { - UNREACHABLE("memory tagging not supported"); -} - inline void disableMemoryTagChecksTestOnly() { UNREACHABLE("memory tagging not supported"); } @@ -191,10 +221,9 @@ UNREACHABLE("memory tagging not supported"); } -inline uptr untagPointer(uptr Ptr) { - (void)Ptr; - UNREACHABLE("memory tagging not supported"); -} +struct ScopedDisableMemoryTagChecks { + ScopedDisableMemoryTagChecks() {} +}; inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, uptr *TaggedEnd) { diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -206,6 +206,15 @@ bool useMemoryTagging() { return false; } void disableMemoryTagging() {} + const char *getRegionInfoArrayAddress() const { return nullptr; } + static uptr getRegionInfoArraySize() { return 0; } + + static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + (void)RegionInfoData; + (void)Ptr; + return {}; + } + private: static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr RegionSize = 1UL << RegionSizeLog; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -209,6 +209,58 @@ } void disableMemoryTagging() { UseMemoryTagging = false; } + const char *getRegionInfoArrayAddress() const { + return reinterpret_cast(RegionInfoArray); + } + + static uptr getRegionInfoArraySize() { + return sizeof(RegionInfoArray); + } + + static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + const RegionInfo *RegionInfoArray = + reinterpret_cast(RegionInfoData); + uptr ClassId; + uptr MinDistance = -1UL; + for (uptr I = 0; I != NumClasses; ++I) { + if (I == SizeClassMap::BatchClassId) + continue; + uptr Begin = RegionInfoArray[I].RegionBeg; + uptr End = Begin + RegionInfoArray[I].AllocatedUser; + if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) + continue; + uptr RegionDistance; + if (Begin <= Ptr) { + if (Ptr < End) + RegionDistance = 0; + else + RegionDistance = Ptr - End; + } else { + RegionDistance = Begin - Ptr; + } + + if (RegionDistance < MinDistance) { + MinDistance = RegionDistance; + ClassId = I; + } + } + + BlockInfo B = {}; + if (MinDistance <= 8192) { + B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; + B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser; + B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); + B.BlockBegin = + B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * + sptr(B.BlockSize)); + while (B.BlockBegin < B.RegionBegin) + B.BlockBegin += B.BlockSize; + while (B.RegionEnd < B.BlockBegin + B.BlockSize) + B.BlockBegin -= B.BlockSize; + } + return B; + } + private: static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; @@ -231,7 +283,7 @@ u64 LastReleaseAtNs; }; - struct alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo { + struct UnpaddedRegionInfo { HybridMutex Mutex; SinglyLinkedList FreeList; RegionStats Stats; @@ -244,13 +296,17 @@ MapPlatformData Data; ReleaseToOsInfo ReleaseInfo; }; + struct RegionInfo : UnpaddedRegionInfo { + char Padding[SCUDO_CACHE_LINE_SIZE - + (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)]; + }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr PrimaryBase; MapPlatformData Data; atomic_s32 ReleaseToOsIntervalMs; bool UseMemoryTagging; - RegionInfo RegionInfoArray[NumClasses]; + alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; RegionInfo *getRegionInfo(uptr ClassId) { DCHECK_LT(ClassId, NumClasses); diff --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/stack_depot.h @@ -0,0 +1,144 @@ +//===-- stack_depot.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_STACK_DEPOT_H_ +#define SCUDO_STACK_DEPOT_H_ + +#include "atomic_helpers.h" +#include "mutex.h" + +namespace scudo { + +class MurMur2HashBuilder { + static const u32 M = 0x5bd1e995; + static const u32 Seed = 0x9747b28c; + static const u32 R = 24; + u32 H; + + public: + explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; } + void add(u32 K) { + K *= M; + K ^= K >> R; + K *= M; + H *= M; + H ^= K; + } + u32 get() { + u32 X = H; + X ^= X >> 13; + X *= M; + X ^= X >> 15; + return X; + } +}; + +class StackDepot { + HybridMutex RingEndMu; + u32 RingEnd; + + // This data structure stores a stack trace for each allocation and + // deallocation when stack trace recording is enabled, that may be looked up + // using a hash of the stack trace. The lower bits of the hash are an index + // into the Tab array, which stores an index into the Ring array where the + // stack traces are stored. As the name implies, Ring is a ring buffer, so a + // stack trace may wrap around to the start of the array. + // + // Each stack trace in Ring is prefixed by a stack trace marker consisting of + // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames + // and stack trace markers in the case where instruction pointers are 4-byte + // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the + // size of the stack trace in bits 33-63. + // + // The insert() function is potentially racy in its accesses to the Tab and + // Ring arrays, but find() is resilient to races in the sense that, barring + // hash collisions, it will either return the correct stack trace or no stack + // trace at all, even if two instances of insert() raced with one another. + // This is achieved by re-checking the hash of the stack trace before + // returning the trace. + +#ifdef SCUDO_FUZZ + // Use smaller table sizes for fuzzing in order to reduce input size. + static const uptr TabBits = 4; +#else + static const uptr TabBits = 16; +#endif + static const uptr TabSize = 1 << TabBits; + static const uptr TabMask = TabSize - 1; + atomic_u32 Tab[TabSize]; + +#ifdef SCUDO_FUZZ + static const uptr RingBits = 4; +#else + static const uptr RingBits = 19; +#endif + static const uptr RingSize = 1 << RingBits; + static const uptr RingMask = RingSize - 1; + atomic_u64 Ring[RingSize]; + +public: + // Insert hash of the stack trace [Begin, End) into the stack depot, and + // return the hash. + u32 insert(uptr *Begin, uptr *End) { + MurMur2HashBuilder B; + for (uptr *I = Begin; I != End; ++I) + B.add(u32(*I) >> 2); + u32 Hash = B.get(); + + u32 Pos = Hash & TabMask; + u32 RingPos = atomic_load_relaxed(&Tab[Pos]); + u64 Entry = atomic_load_relaxed(&Ring[RingPos]); + u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1; + if (Entry == Id) + return Hash; + + ScopedLock Lock(RingEndMu); + RingPos = RingEnd; + atomic_store_relaxed(&Tab[Pos], RingPos); + atomic_store_relaxed(&Ring[RingPos], Id); + for (uptr *I = Begin; I != End; ++I) { + RingPos = (RingPos + 1) & RingMask; + atomic_store_relaxed(&Ring[RingPos], *I); + } + RingEnd = (RingPos + 1) & RingMask; + return Hash; + } + + // Look up a stack trace by hash. Returns true if successful. The trace may be + // accessed via operator[] passing indexes between *RingPosPtr and + // *RingPosPtr + *SizePtr. + bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { + u32 Pos = Hash & TabMask; + u32 RingPos = atomic_load_relaxed(&Tab[Pos]); + if (RingPos >= RingSize) + return false; + u64 Entry = atomic_load_relaxed(&Ring[RingPos]); + u64 HashWithTagBit = (u64(Hash) << 1) | 1; + if ((Entry & 0x1ffffffff) != HashWithTagBit) + return false; + u32 Size = Entry >> 33; + if (Size >= RingSize) + return false; + *RingPosPtr = (RingPos + 1) & RingMask; + *SizePtr = Size; + MurMur2HashBuilder B; + for (uptr I = 0; I != Size; ++I) { + RingPos = (RingPos + 1) & RingMask; + B.add(u32(atomic_load_relaxed(&Ring[RingPos])) >> 2); + } + return B.get() == Hash; + } + + u64 operator[](uptr RingPos) const { + return atomic_load_relaxed(&Ring[RingPos & RingMask]); + } +}; + +} // namespace scudo + +#endif // SCUDO_STACK_DEPOT_H_ diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -219,4 +219,13 @@ SCUDO_ALLOCATOR.disableMemoryTagging(); } +// Sets whether scudo records stack traces and other metadata for allocations +// and deallocations. This function only has an effect if the allocator and +// hardware support memory tagging. The program must be single threaded at the +// point when the function is called. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_track_allocation_stacks)(int track) { + SCUDO_ALLOCATOR.setTrackAllocationStacks(track); +} + } // extern "C" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -48,4 +48,28 @@ // TODO(kostyak): support both allocators. INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } +INTERFACE void __scudo_get_error_info( + struct scudo_error_info *error_info, uintptr_t fault_addr, + const char *stack_depot, const char *region_info, const char *memory, + const char *memory_tags, uintptr_t memory_addr, size_t memory_size) { + Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info, + memory, memory_tags, memory_addr, memory_size); +} + +INTERFACE const char *__scudo_get_stack_depot_addr() { + return Allocator.getStackDepotAddress(); +} + +INTERFACE size_t __scudo_get_stack_depot_size() { + return sizeof(scudo::StackDepot); +} + +INTERFACE const char *__scudo_get_region_info_addr() { + return Allocator.getRegionInfoArrayAddress(); +} + +INTERFACE size_t __scudo_get_region_info_size() { + return Allocator.getRegionInfoArraySize(); +} + #endif // SCUDO_ANDROID && _BIONIC