diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -576,6 +576,12 @@ return t->GenerateRandomTag(); } +void __hwasan_record_frame_record(u64 frame_record_info) { + Thread *t = GetCurrentThread(); + if (t) + t->stack_allocations()->push(frame_record_info); +} + #if !SANITIZER_SUPPORTS_WEAK_HOOKS extern "C" { SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE diff --git a/compiler-rt/lib/hwasan/hwasan_interface_internal.h b/compiler-rt/lib/hwasan/hwasan_interface_internal.h --- a/compiler-rt/lib/hwasan/hwasan_interface_internal.h +++ b/compiler-rt/lib/hwasan/hwasan_interface_internal.h @@ -168,6 +168,9 @@ SANITIZER_INTERFACE_ATTRIBUTE void __hwasan_print_memory_usage(); +SANITIZER_INTERFACE_ATTRIBUTE +void __hwasan_record_frame_record(u64 frame_record_info); + SANITIZER_INTERFACE_ATTRIBUTE void *__hwasan_memcpy(void *dst, const void *src, uptr size); SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/test/hwasan/TestCases/deep-recursion.c b/compiler-rt/test/hwasan/TestCases/deep-recursion.c --- a/compiler-rt/test/hwasan/TestCases/deep-recursion.c +++ b/compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -5,6 +5,15 @@ // RUN: %env_hwasan_opts=stack_history_size=5 not %run %t 2>&1 | FileCheck %s --check-prefix=D5 // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=DEFAULT +// Run the same tests as above, but using the __hwasan_record_frame_record libcall. +// The output should be the exact same. +// RUN: %clang_hwasan -O1 %s -o %t.libcall -mllvm -hwasan-record-stack-history-with-calls=1 +// RUN: %env_hwasan_opts=stack_history_size=1 not %run %t.libcall 2>&1 | FileCheck %s --check-prefix=D1 +// RUN: %env_hwasan_opts=stack_history_size=2 not %run %t.libcall 2>&1 | FileCheck %s --check-prefix=D2 +// RUN: %env_hwasan_opts=stack_history_size=3 not %run %t.libcall 2>&1 | FileCheck %s --check-prefix=D3 +// RUN: %env_hwasan_opts=stack_history_size=5 not %run %t.libcall 2>&1 | FileCheck %s --check-prefix=D5 +// RUN: not %run %t.libcall 2>&1 | FileCheck %s --check-prefix=DEFAULT + // REQUIRES: stable-runtime // Stack histories are currently not recorded on x86. diff --git a/compiler-rt/test/hwasan/TestCases/stack-history-length.c b/compiler-rt/test/hwasan/TestCases/stack-history-length.c --- a/compiler-rt/test/hwasan/TestCases/stack-history-length.c +++ b/compiler-rt/test/hwasan/TestCases/stack-history-length.c @@ -2,6 +2,12 @@ // RUN: %env_hwasan_opts=stack_history_size=2048 not %run %t 2045 2>&1 | FileCheck %s --check-prefix=YES // RUN: %env_hwasan_opts=stack_history_size=2048 not %run %t 2047 2>&1 | FileCheck %s --check-prefix=NO +// Run the same tests as above, but using the __hwasan_record_frame_record libcall. +// The output should be the exact same. +// RUN: %clang_hwasan -O1 %s -o %t.libcall -mllvm -hwasan-record-stack-history-with-calls=1 +// RUN: %env_hwasan_opts=stack_history_size=2048 not %run %t.libcall 2045 2>&1 | FileCheck %s --check-prefix=YES +// RUN: %env_hwasan_opts=stack_history_size=2048 not %run %t.libcall 2047 2>&1 | FileCheck %s --check-prefix=NO + // REQUIRES: stable-runtime // Stack histories are currently not recorded on x86. diff --git a/compiler-rt/test/hwasan/TestCases/stack-uar.c b/compiler-rt/test/hwasan/TestCases/stack-uar.c --- a/compiler-rt/test/hwasan/TestCases/stack-uar.c +++ b/compiler-rt/test/hwasan/TestCases/stack-uar.c @@ -2,6 +2,10 @@ // RUN: %clang_hwasan -g %s -o %t && not %run %t 2>&1 | FileCheck %s // RUN: %clang_hwasan -g %s -o %t && not %env_hwasan_opts=symbolize=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOSYM +// Run the same test as above, but using the __hwasan_record_frame_record libcall. +// The output should be the exact same. +// RUN: %clang_hwasan -g %s -o %t.libcall -mllvm -hwasan-record-stack-history-with-calls=1 && not %env_hwasan_opts=symbolize=0 %run %t.libcall 2>&1 | FileCheck %s --check-prefix=NOSYM + // REQUIRES: stable-runtime // Stack histories currently are not recorded on x86. diff --git a/compiler-rt/test/hwasan/TestCases/stack-uas.c b/compiler-rt/test/hwasan/TestCases/stack-uas.c --- a/compiler-rt/test/hwasan/TestCases/stack-uas.c +++ b/compiler-rt/test/hwasan/TestCases/stack-uas.c @@ -8,6 +8,10 @@ // RUN: %clang_hwasan -mllvm -hwasan-use-after-scope -g %s -o %t && not %run %t 2>&1 | FileCheck %s +// Run the same test as above, but using the __hwasan_record_frame_record libcall. +// The output should be the exact same. +// RUN: %clang_hwasan -mllvm -hwasan-use-after-scope -mllvm -hwasan-record-stack-history-with-calls=1 -g %s -o %t.libcall && not %env_hwasan_opts=symbolize=0 %run %t.libcall 2>&1 | FileCheck %s --check-prefix=NOSYM + // REQUIRES: stable-runtime // Stack histories currently are not recorded on x86. diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -185,6 +185,13 @@ cl::desc("Record stack frames with tagged allocations " "in a thread-local ring buffer"), cl::Hidden, cl::init(true)); + +static cl::opt ClRecordStackHistoryWithCalls( + "hwasan-record-stack-history-with-calls", + cl::desc("If recording stack frames with tagged allocations, record them " + "with runtime library calls"), + cl::Hidden, cl::init(false)); + static cl::opt ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics", cl::desc("instrument memory intrinsics"), @@ -313,6 +320,7 @@ Value *getPC(IRBuilder<> &IRB); Value *getSP(IRBuilder<> &IRB); + Value *getFrameRecordInfo(IRBuilder<> &IRB); void instrumentPersonalityFunctions(); @@ -378,6 +386,7 @@ FunctionCallee HwasanTagMemoryFunc; FunctionCallee HwasanGenerateTagFunc; + FunctionCallee HwasanRecordFrameRecordFunc; Constant *ShadowGlobal; @@ -629,6 +638,9 @@ HwasanGenerateTagFunc = M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty); + HwasanRecordFrameRecordFunc = M.getOrInsertFunction( + "__hwasan_record_frame_record", IRB.getVoidTy(), Int64Ty); + ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(IRB.getInt8Ty(), 0)); @@ -1132,6 +1144,21 @@ return CachedSP; } +Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) { + // Prepare ring buffer data. + Value *PC = getPC(IRB); + Value *SP = getSP(IRB); + + // Mix SP and PC. + // Assumptions: + // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) + // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) + // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: + // 0xSSSSPPPPPPPPPPPP + SP = IRB.CreateShl(SP, 44); + return IRB.CreateOr(PC, SP); +} + void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { if (!Mapping.InTls) ShadowBase = getShadowNonTls(IRB); @@ -1141,50 +1168,75 @@ if (!WithFrameRecord && ShadowBase) return; - Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); - assert(SlotPtr); - - Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); - // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI. - Value *ThreadLongMaybeUntagged = - TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); - + Value *ThreadLongMaybeUntagged = nullptr; if (WithFrameRecord) { - StackBaseTag = IRB.CreateAShr(ThreadLong, 3); - - // Prepare ring buffer data. - Value *PC = getPC(IRB); - Value *SP = getSP(IRB); - - // Mix SP and PC. - // Assumptions: - // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) - // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) - // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: - // 0xSSSSPPPPPPPPPPPP - SP = IRB.CreateShl(SP, 44); - - // Store data to ring buffer. - Value *RecordPtr = - IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0)); - IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); - - // Update the ring buffer. Top byte of ThreadLong defines the size of the - // buffer in pages, it must be a power of two, and the start of the buffer - // must be aligned by twice that much. Therefore wrap around of the ring - // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). - // The use of AShr instead of LShr is due to - // https://bugs.llvm.org/show_bug.cgi?id=39030 - // Runtime library makes sure not to use the highest bit. - Value *WrapMask = IRB.CreateXor( - IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), - ConstantInt::get(IntptrTy, (uint64_t)-1)); - Value *ThreadLongNew = IRB.CreateAnd( - IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); - IRB.CreateStore(ThreadLongNew, SlotPtr); + if (ClRecordStackHistoryWithCalls) { + // Emit a runtime call into hwasan rather than emitting instructions for + // recording stack history. + Value *FrameRecordInfo = getFrameRecordInfo(IRB); + IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo}); + } else { + Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); + assert(SlotPtr); + + Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); + // Extract the address field from ThreadLong. Unnecessary on AArch64 with + // TBI. + ThreadLongMaybeUntagged = + TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); + + StackBaseTag = IRB.CreateAShr(ThreadLong, 3); + + // Prepare ring buffer data. + // TODO: The SP and PC mixing is done here rather than calling + // `getFrameRecordInfo` to preserve existing codegen for code not emitting + // libcalls. Once this change lands, we could come back and refactor this + // to use `getFrameRecordInfo` and update codegen tests. + Value *PC = getPC(IRB); + Value *SP = getSP(IRB); + + // Mix SP and PC. + // Assumptions: + // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) + // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) + // We only really need ~20 lower non-zero bits (SSSS), so we mix like + // this: + // 0xSSSSPPPPPPPPPPPP + SP = IRB.CreateShl(SP, 44); + + // Store data to ring buffer. + Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged, + IntptrTy->getPointerTo(0)); + IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); + + // Update the ring buffer. Top byte of ThreadLong defines the size of the + // buffer in pages, it must be a power of two, and the start of the buffer + // must be aligned by twice that much. Therefore wrap around of the ring + // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). + // The use of AShr instead of LShr is due to + // https://bugs.llvm.org/show_bug.cgi?id=39030 + // Runtime library makes sure not to use the highest bit. + Value *WrapMask = IRB.CreateXor( + IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), + ConstantInt::get(IntptrTy, (uint64_t)-1)); + Value *ThreadLongNew = IRB.CreateAnd( + IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); + IRB.CreateStore(ThreadLongNew, SlotPtr); + } } if (!ShadowBase) { + if (!ThreadLongMaybeUntagged) { + Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); + assert(SlotPtr); + + Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); + // Extract the address field from ThreadLong. Unnecessary on AArch64 with + // TBI. + ThreadLongMaybeUntagged = + TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); + } + // Get shadow base address by aligning RecordPtr up. // Note: this is not correct if the pointer is already aligned. // Runtime library will make sure this never happens. diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll --- a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -1,9 +1,9 @@ ; Test -hwasan-with-ifunc flag. ; ; RUN: opt -passes=hwasan -S < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOGLOBAL,CHECK-TLS-SLOT,CHECK-HISTORY,CHECK-HISTORY-TLS-SLOT +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOGLOBAL,CHECK-TLS-SLOT,CHECK-HISTORY,CHECK-HISTORY-TLS-SLOT,CHECK-HISTORY-TLS ; RUN: opt -passes=hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=1 -hwasan-record-stack-history=1 < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOGLOBAL,CHECK-TLS-SLOT,CHECK-HISTORY,CHECK-HISTORY-TLS-SLOT +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOGLOBAL,CHECK-TLS-SLOT,CHECK-HISTORY,CHECK-HISTORY-TLS-SLOT,CHECK-HISTORY-TLS ; RUN: opt -passes=hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=1 -hwasan-record-stack-history=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOGLOBAL,CHECK-IFUNC,CHECK-NOHISTORY ; RUN: opt -passes=hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=0 < %s | \ @@ -11,7 +11,9 @@ ; RUN: opt -passes=hwasan -S -hwasan-with-ifunc=1 -hwasan-with-tls=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC,CHECK-NOHISTORY ; RUN: opt -passes=hwasan -S -mtriple=aarch64-fuchsia < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-ZERO-OFFSET,CHECK-SHORT-GRANULES,CHECK-HISTORY,CHECK-HWASAN-TLS,CHECK-HISTORY-HWASAN-TLS +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-ZERO-OFFSET,CHECK-SHORT-GRANULES,CHECK-HISTORY,CHECK-HWASAN-TLS,CHECK-HISTORY-HWASAN-TLS,CHECK-HISTORY-TLS +; RUN: opt -passes=hwasan -S -mtriple=aarch64-fuchsia -hwasan-record-stack-history-with-calls=1 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-HISTORY,CHECK-HISTORY-LIBCALL target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android22" @@ -66,20 +68,28 @@ ; CHECK-NOHISTORY-NOT: store i64 -; CHECK-HISTORY: call i64 @llvm.read_register.i64(metadata [[MD:![0-9]*]]) -; CHECK-HISTORY: %[[PTR:[^ ]*]] = inttoptr i64 %[[D]] to i64* -; CHECK-HISTORY: store i64 %{{.*}}, i64* %[[PTR]] -; CHECK-HISTORY: %[[D1:[^ ]*]] = ashr i64 %[[D]], 56 -; CHECK-HISTORY: %[[D2:[^ ]*]] = shl nuw nsw i64 %[[D1]], 12 -; CHECK-HISTORY: %[[D3:[^ ]*]] = xor i64 %[[D2]], -1 -; CHECK-HISTORY: %[[D4:[^ ]*]] = add i64 %[[D]], 8 -; CHECK-HISTORY: %[[D5:[^ ]*]] = and i64 %[[D4]], %[[D3]] +; When watching stack history, all code paths attempt to get PC and SP and mix them together. +; CHECK-HISTORY: %[[PC:[^ ]*]] = call i64 @llvm.read_register.i64(metadata [[MD:![0-9]*]]) +; CHECK-HISTORY: %[[SP0:[^ ]*]] = call i8* @llvm.frameaddress.p0i8(i32 0) +; CHECK-HISTORY: %[[SP1:[^ ]*]] = ptrtoint i8* %[[SP0]] to i64 +; CHECK-HISTORY: %[[SP2:[^ ]*]] = shl i64 %[[SP1]], 44 + +; CHECK-HISTORY-TLS: %[[PTR:[^ ]*]] = inttoptr i64 %[[D]] to i64* +; CHECK-HISTORY: %[[MIX:[^ ]*]] = or i64 %[[PC]], %[[SP2]] +; CHECK-HISTORY-TLS: store i64 %[[MIX]], i64* %[[PTR]] +; CHECK-HISTORY-TLS: %[[D1:[^ ]*]] = ashr i64 %[[D]], 56 +; CHECK-HISTORY-TLS: %[[D2:[^ ]*]] = shl nuw nsw i64 %[[D1]], 12 +; CHECK-HISTORY-TLS: %[[D3:[^ ]*]] = xor i64 %[[D2]], -1 +; CHECK-HISTORY-TLS: %[[D4:[^ ]*]] = add i64 %[[D]], 8 +; CHECK-HISTORY-TLS: %[[D5:[^ ]*]] = and i64 %[[D4]], %[[D3]] ; CHECK-HISTORY-TLS-SLOT: store i64 %[[D5]], i64* %[[C]] ; CHECK-HISTORY-HWASAN-TLS: store i64 %[[D5]], i64* @__hwasan_tls +; CHECK-HISTORY-LIBCALL: call void @__hwasan_record_frame_record(i64 %[[MIX]]) ; CHECK-TLS: %[[F:[^ ]*]] = or i64 %[[D]], 4294967295 ; CHECK-TLS: = add i64 %[[F]], 1 +; CHECK-HISTORY-LIBCALL: %[[E:hwasan.stack.base.tag]] = xor ; CHECK-HISTORY: = xor i64 %[[E]], 0 ; CHECK-NOHISTORY-NOT: store i64