Index: llvm/lib/Target/AArch64/AArch64StackTagging.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64StackTagging.cpp +++ llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -491,6 +492,15 @@ Info.AI = NewAI; } +// Helper function to check for post-dominance. +static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A, + const IntrinsicInst *B) { + const BasicBlock *ABB = A->getParent(); + const BasicBlock *BBB = B->getParent(); + + return ABB == BBB || PDT->dominates(ABB, BBB); +} + // FIXME: check for MTE extension bool AArch64StackTagging::runOnFunction(Function &Fn) { if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) @@ -565,23 +575,31 @@ if (NumInterestingAllocas == 0) return true; + std::unique_ptr DeleteDT; + DominatorTree *DT = nullptr; + if (auto *P = getAnalysisIfAvailable()) + DT = &P->getDomTree(); + + if (DT == nullptr && (NumInterestingAllocas > 1 || + !F->hasFnAttribute(Attribute::OptimizeNone))) { + DeleteDT = std::make_unique(*F); + DT = DeleteDT.get(); + } + + std::unique_ptr DeletePDT; + PostDominatorTree *PDT = nullptr; + if (auto *P = getAnalysisIfAvailable()) + PDT = &P->getPostDomTree(); + + if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) { + DeletePDT = std::make_unique(*F); + PDT = DeletePDT.get(); + } + SetTagFunc = Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); - // Compute DT only if the function has the attribute, there are more than 1 - // interesting allocas, and it is not available for free. - Instruction *Base; - if (NumInterestingAllocas > 1) { - auto *DTWP = getAnalysisIfAvailable(); - if (DTWP) { - Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree()); - } else { - DominatorTree DT(*F); - Base = insertBaseTaggedPointer(Allocas, &DT); - } - } else { - Base = insertBaseTaggedPointer(Allocas, nullptr); - } + Instruction *Base = insertBaseTaggedPointer(Allocas, DT); for (auto &I : Allocas) { const AllocaInfo &Info = I.second; @@ -604,11 +622,37 @@ if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 && Info.LifetimeEnd.size() == 1) { IntrinsicInst *Start = Info.LifetimeStart[0]; + IntrinsicInst *End = Info.LifetimeEnd[0]; uint64_t Size = dyn_cast(Start->getArgOperand(0))->getZExtValue(); Size = alignTo(Size, kTagGranuleSize); tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size); - untagAlloca(AI, Info.LifetimeEnd[0], Size); + // We need to ensure that if we tag some object, we certainly untag it + // before the function exits. + if (PDT != nullptr && postDominates(PDT, End, Start)) { + untagAlloca(AI, End, Size); + } else { + SmallVector ReachableRetVec; + unsigned NumCoveredExits = 0; + for (auto &RI : RetVec) { + if (!isPotentiallyReachable(Start, RI, nullptr, DT)) + continue; + ReachableRetVec.push_back(RI); + if (DT != nullptr && DT->dominates(End, RI)) + ++NumCoveredExits; + } + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + if (NumCoveredExits == ReachableRetVec.size()) { + untagAlloca(AI, End, Size); + } else { + for (auto &RI : ReachableRetVec) + untagAlloca(AI, RI, Size); + // We may have inserted untag outside of the lifetime interval. + // Remove the lifetime end call for this alloca. + End->eraseFromParent(); + } + } } else { uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()); Index: llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/stack-tagging-ex-1.ll @@ -0,0 +1,69 @@ +; RUN: opt -S -stack-tagging %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-unknown-eabi" + +define void @f() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +start: +; CHECK-LABEL: start: + %a = alloca i8, i32 48, align 8 + call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %a) #2 +; CHECK: call void @llvm.aarch64.settag(i8* %a.tag, i64 48) + %b = alloca i8, i32 48, align 8 + call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %b) #2 +; CHECK: call void @llvm.aarch64.settag(i8* %b.tag, i64 48) + invoke void @g (i8 * nonnull %a, i8 * nonnull %b) to label %next0 unwind label %lpad0 +; CHECK-NOT: settag + +next0: +; CHECK-LABEL: next0: + call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %a) + call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %b) + br label %exit +; CHECK-NOT: settag + +lpad0: +; CHECK-LABEL: lpad0: + %pad0v = landingpad { i8*, i32 } catch i8* null + %v = extractvalue { i8*, i32 } %pad0v, 0 + %x = call i8* @__cxa_begin_catch(i8* %v) #2 + invoke void @__cxa_end_catch() to label %next1 unwind label %lpad1 +; CHECK-NOT: settag + +next1: +; CHECK-LABEL: next1: + br label %exit +; CHECK-NOT: settag + +lpad1: +; CHECK-LABEL: lpad1: +; CHECK-DAG: call void @llvm.aarch64.settag(i8* %a, i64 48) +; CHECK-DAG: call void @llvm.aarch64.settag(i8* %b, i64 48) + %pad1v = landingpad { i8*, i32 } cleanup + resume { i8*, i32 } %pad1v + +exit: +; CHECK-LABEL: exit: +; CHECK-DAG: call void @llvm.aarch64.settag(i8* %a, i64 48) +; CHECK-DAG: call void @llvm.aarch64.settag(i8* %b, i64 48) + ret void +; CHECK: ret void +} + +declare void @g(i8 *, i8 *) #0 + +declare dso_local i32 @__gxx_personality_v0(...) + +declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare dso_local void @__cxa_end_catch() local_unnamed_addr + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nounwind } Index: llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll @@ -0,0 +1,183 @@ +; clang -target aarch64-eabi -O2 -march=armv8.5-a+memtag -fsanitize=memtag -S -emit-llvm test.cc +; void bar() { +; throw 42; +; } + +; void foo() { +; int A0; +; __asm volatile("" : : "r"(&A0)); + +; try { +; bar(); +; } catch (int exc) { +; } + +; throw 15532; +; } + +; int main() { +; try { +; foo(); +; } catch (int exc) { +; } + +; return 0; +; } + +; RUN: opt -S -stack-tagging %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-unknown-eabi" + +@_ZTIi = external dso_local constant i8* + +; Function Attrs: noreturn sanitize_memtag +define dso_local void @_Z3barv() local_unnamed_addr #0 { +entry: + %exception = tail call i8* @__cxa_allocate_exception(i64 4) #4 + %0 = bitcast i8* %exception to i32* + store i32 42, i32* %0, align 16, !tbaa !2 + tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + unreachable +} + +declare dso_local i8* @__cxa_allocate_exception(i64) local_unnamed_addr + +declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr + +; Function Attrs: noreturn sanitize_memtag +define dso_local void @_Z3foov() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %A0 = alloca i32, align 4 + %0 = bitcast i32* %A0 to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 + call void asm sideeffect "", "r"(i32* nonnull %A0) #4, !srcloc !6 + invoke void @_Z3barv() + to label %try.cont unwind label %lpad + +lpad: ; preds = %entry + %1 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + %2 = extractvalue { i8*, i32 } %1, 1 + %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4 + %matches = icmp eq i32 %2, %3 + br i1 %matches, label %catch, label %ehcleanup + +catch: ; preds = %lpad + %4 = extractvalue { i8*, i32 } %1, 0 + %5 = call i8* @__cxa_begin_catch(i8* %4) #4 + call void @__cxa_end_catch() #4 + br label %try.cont + +try.cont: ; preds = %entry, %catch + %exception = call i8* @__cxa_allocate_exception(i64 4) #4 + %6 = bitcast i8* %exception to i32* + store i32 15532, i32* %6, align 16, !tbaa !2 + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + unreachable + +ehcleanup: ; preds = %lpad + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4 + resume { i8*, i32 } %1 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare dso_local i32 @__gxx_personality_v0(...) + +; Function Attrs: nounwind readnone +declare i32 @llvm.eh.typeid.for(i8*) #2 + +declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare dso_local void @__cxa_end_catch() local_unnamed_addr + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: norecurse sanitize_memtag +define dso_local i32 @main() local_unnamed_addr #3 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: +; CHECK-LABEL: entry: + %A0.i = alloca i32, align 4 + %0 = bitcast i32* %A0.i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 + call void asm sideeffect "", "r"(i32* nonnull %A0.i) #4, !srcloc !6 +; CHECK: call void @llvm.aarch64.settag(i8* %1, i64 16) +; CHECK-NEXT: call void asm sideeffect + %exception.i6 = call i8* @__cxa_allocate_exception(i64 4) #4 + %1 = bitcast i8* %exception.i6 to i32* + store i32 42, i32* %1, align 16, !tbaa !2 + invoke void @__cxa_throw(i8* %exception.i6, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + to label %.noexc7 unwind label %lpad.i + +.noexc7: ; preds = %entry + unreachable + +lpad.i: ; preds = %entry + %2 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + %3 = extractvalue { i8*, i32 } %2, 1 + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4 + %matches.i = icmp eq i32 %3, %4 + br i1 %matches.i, label %catch.i, label %ehcleanup.i + +catch.i: ; preds = %lpad.i + %5 = extractvalue { i8*, i32 } %2, 0 + %6 = call i8* @__cxa_begin_catch(i8* %5) #4 + call void @__cxa_end_catch() #4 + %exception.i = call i8* @__cxa_allocate_exception(i64 4) #4 + %7 = bitcast i8* %exception.i to i32* + store i32 15532, i32* %7, align 16, !tbaa !2 + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + to label %.noexc unwind label %lpad + +.noexc: ; preds = %catch.i + unreachable + +ehcleanup.i: ; preds = %lpad.i + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4 + br label %lpad.body + +lpad: ; preds = %catch.i + %8 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + %.pre = extractvalue { i8*, i32 } %8, 1 + br label %lpad.body + +lpad.body: ; preds = %ehcleanup.i, %lpad + %.pre-phi = phi i32 [ %3, %ehcleanup.i ], [ %.pre, %lpad ] + %eh.lpad-body = phi { i8*, i32 } [ %2, %ehcleanup.i ], [ %8, %lpad ] + %matches = icmp eq i32 %.pre-phi, %4 + br i1 %matches, label %catch, label %eh.resume + +catch: ; preds = %lpad.body + %9 = extractvalue { i8*, i32 } %eh.lpad-body, 0 + %10 = call i8* @__cxa_begin_catch(i8* %9) #4 + call void @__cxa_end_catch() #4 + ret i32 0 + +eh.resume: ; preds = %lpad.body + resume { i8*, i32 } %eh.lpad-body +} + +attributes #0 = { noreturn sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nounwind readnone } +attributes #3 = { norecurse sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } +attributes #5 = { noreturn } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project.git c38188c5fe41751fda095edde1a878b2a051ae58)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!6 = !{i32 70} Index: llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/stack-tagging-untag-placement.ll @@ -0,0 +1,82 @@ +;; RUN: opt -S -stack-tagging %s -o - | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-unknown-eabi" + +define void @f() local_unnamed_addr #0 { +S0: +; CHECK-LABEL: S0: +; CHECK: %basetag = call i8* @llvm.aarch64.irg.sp(i64 0) + %v = alloca i8, i32 48, align 8 +; CHECK: %v.tag = call i8* @llvm.aarch64.tagp.p0i8(i8* %v, i8* %basetag, i64 0) + %w = alloca i8, i32 48, align 16 +; CHECK: %w.tag = call i8* @llvm.aarch64.tagp.p0i8(i8* %w, i8* %basetag, i64 1) + + %t0 = call i32 @g0() #1 + %b0 = icmp eq i32 %t0, 0 + br i1 %b0, label %S1, label %exit3 + +S1: +; CHECK-LABEL: S1: + call void @llvm.lifetime.start.p0i8(i64 48, i8 * nonnull %v) #1 +; CHECK: call void @llvm.aarch64.settag(i8* %v.tag, i64 48) + call void @llvm.lifetime.start.p0i8(i64 48, i8 * nonnull %w) #1 +; CHECK: call void @llvm.aarch64.settag(i8* %w.tag, i64 48) + %t1 = call i32 @g1(i8 * nonnull %v, i8 * nonnull %w) #1 +; CHECK: call i32 @g1 +; CHECK-NOT: settag{{.*}}%v +; CHECK: call void @llvm.aarch64.settag(i8* %w, i64 48) +; CHECK-NOT: settag{{.*}}%v + call void @llvm.lifetime.end.p0i8(i64 48, i8 * nonnull %w) #1 +; CHECK: call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %w.tag) + %b1 = icmp eq i32 %t1, 0 + br i1 %b1, label %S2, label %S3 +; CHECK-NOT: settag + +S2: +; CHECK-LABEL: S2: + call void @z0() #1 + br label %exit1 +; CHECK-NOT: settag + +S3: +; CHECK-LABEL: S3: + call void @llvm.lifetime.end.p0i8(i64 48, i8 * nonnull %v) #1 + tail call void @z1() #1 + br label %exit2 +; CHECK-NOT: settag + +exit1: +; CHECK-LABEL: exit1: +; CHECK: call void @llvm.aarch64.settag(i8* %v, i64 48) + ret void + +exit2: +; CHECK-LABEL: exit2: +; CHECK: call void @llvm.aarch64.settag(i8* %v, i64 48) + ret void + +exit3: +; CHECK-LABEL: exit3: + call void @z2() #1 +; CHECK-NOT: settag + ret void +; CHECK: ret void +} + +declare i32 @g0() #0 + +declare i32 @g1(i8 *, i8 *) #0 + +declare void @z0() #0 + +declare void @z1() #0 + +declare void @z2() #0 + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8 * nocapture) #1 + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8 * nocapture) #1 + +attributes #0 = { sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } +