diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -56,6 +56,7 @@ createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); FunctionPass *createAArch64PreLegalizeCombiner(); +FunctionPass *createAArch64StackTaggingPass(); void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); @@ -78,6 +79,7 @@ void initializeFalkorHWPFFixPass(PassRegistry&); void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); +void initializeAArch64StackTaggingPass(PassRegistry&); } // end namespace llvm #endif diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -0,0 +1,327 @@ +//===- AArch64StackTagging.cpp - Stack tagging in IR --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "stack-tagging" + +static constexpr unsigned kTagGranuleSize = 16; + +namespace { +class AArch64StackTagging : public FunctionPass { + struct AllocaInfo { + AllocaInst *AI; + SmallVector LifetimeStart; + SmallVector LifetimeEnd; + int Tag; + }; + +public: + static char ID; // Pass ID, replacement for typeid + + AArch64StackTagging() : FunctionPass(ID) { + initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); + } + + uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const; + AllocaInst *findAllocaForLifetime(Value *V); + bool isInterestingAlloca(const AllocaInst &AI); + + void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, + uint64_t Size); + void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); + + Instruction * + insertBaseTaggedPointer(const MapVector &Allocas, + const DominatorTree *DT); + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return "AArch64 Stack Tagging"; } + +private: + Function *F; + Function *SetTagFunc; + const DataLayout *DL; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } +}; + +} // end anonymous namespace + +char AArch64StackTagging::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, + "AArch64 Stack Tagging", false, false) +INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, + "AArch64 Stack Tagging", false, false) + +FunctionPass *llvm::createAArch64StackTaggingPass() { + return new AArch64StackTagging(); +} + +uint64_t +AArch64StackTagging::getAllocaSizeInBytes(const AllocaInst &AI) const { + uint64_t ArraySize = 1; + if (AI.isArrayAllocation()) { + const ConstantInt *CI = dyn_cast(AI.getArraySize()); + assert(CI && "non-constant array size"); + ArraySize = CI->getZExtValue(); + } + Type *Ty = AI.getAllocatedType(); + uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty); + return SizeInBytes * ArraySize; +} + +bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { + bool IsInteresting = + AI.getAllocatedType()->isSized() && AI.isStaticAlloca() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(AI) > 0 && + // inalloca allocas are not treated as static, and we don't want + // dynamic alloca instrumentation for them as well. + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError(); + return IsInteresting; +} + +AllocaInst *AArch64StackTagging::findAllocaForLifetime(Value *V) { + SmallVector Objs; + if (!getUnderlyingObjectsForCodeGen(V, Objs, F->getParent()->getDataLayout())) + return nullptr; + + if (Objs.size() != 1) + return nullptr; + + return dyn_cast(Objs[0]); +} + +void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, + Value *Ptr, uint64_t Size) { + auto SetTagFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); + IRBuilder<> IRB(InsertBefore); + IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); +} + +void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, + uint64_t Size) { + auto SetTagFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); + IRBuilder<> IRB(InsertBefore); + IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()), + ConstantInt::get(IRB.getInt64Ty(), Size)}); +} + +Instruction *AArch64StackTagging::insertBaseTaggedPointer( + const MapVector &Allocas, const DominatorTree *DT) { + BasicBlock *PrologueBB = nullptr; + // Try sinking IRG as deep as possible to avoid hurting shrink wrap. + for (auto &I : Allocas) { + if (I.second.Tag < 0) + continue; + if (!PrologueBB) { + PrologueBB = I.first->getParent(); + continue; + } + PrologueBB = + DT->findNearestCommonDominator(PrologueBB, I.first->getParent()); + } + assert(PrologueBB); + + IRBuilder<> IRB(&PrologueBB->front()); + Function *IRG_SP = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp); + Instruction *Base = + IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())}); + Base->setName("basetag"); + return Base; +} + +// FIXME: check for MTE extension + +bool AArch64StackTagging::runOnFunction(Function &Fn) { + if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) + return false; + + F = &Fn; + DL = &Fn.getParent()->getDataLayout(); + + unsigned MinAlignment = kTagGranuleSize; + + MapVector Allocas; // need stable iteration order + SmallVector RetVec; + DenseMap AllocaForValue; + SmallVector UnrecognizedLifetimes; + + for (auto &BB : *F) { + for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) { + Instruction *I = &*IT; + if (auto *AI = dyn_cast(I)) { + Allocas[AI].AI = AI; + continue; + } + + auto *II = dyn_cast(I); + if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end)) { + AllocaInst *AI = + llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue); + if (!AI) { + UnrecognizedLifetimes.push_back(I); + continue; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Allocas[AI].LifetimeStart.push_back(II); + else + Allocas[AI].LifetimeEnd.push_back(II); + } + + if (isa(I) || isa(I) || isa(I)) + RetVec.push_back(I); + } + } + + if (Allocas.empty()) + return false; + + int NextTag = 0; + int NumInterestingAllocas = 0; + for (auto &I : Allocas) { + AllocaInfo &Info = I.second; + assert(Info.AI); + + unsigned NewAlignment = std::max(Info.AI->getAlignment(), MinAlignment); + Info.AI->setAlignment(NewAlignment); + + if (!isInterestingAlloca(*Info.AI)) { + Info.Tag = -1; + } else { + NumInterestingAllocas++; + Info.Tag = NextTag; + NextTag = (NextTag + 1) % 16; + } + } + + if (NumInterestingAllocas == 0) + return true; + + SetTagFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); + + // Compute DT only if the function has the attribute, there are more than 1 + // interesting allocas, and it is not available for free. + Instruction *Base; + if (NumInterestingAllocas > 1) { + auto *DTWP = getAnalysisIfAvailable(); + if (DTWP) { + Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree()); + } else { + DominatorTree DT(*F); + Base = insertBaseTaggedPointer(Allocas, &DT); + } + } else { + Base = insertBaseTaggedPointer(Allocas, nullptr); + } + + for (auto &I : Allocas) { + AllocaInst *AI = I.first; + AllocaInfo &Info = I.second; + if (Info.Tag < 0) + continue; + + // Replace alloca with tagp(alloca). + IRBuilder<> IRB(Info.AI->getNextNode()); + Function *TagP = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()}); + Instruction *TagPCall = + IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base, + ConstantInt::get(IRB.getInt64Ty(), Info.Tag)}); + if (Info.AI->hasName()) + TagPCall->setName(Info.AI->getName() + ".tag"); + Info.AI->replaceAllUsesWith(TagPCall); + TagPCall->setOperand(0, Info.AI); + + if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 && + Info.LifetimeEnd.size() == 1) { + IntrinsicInst *Start = Info.LifetimeStart[0]; + uint64_t Size = + dyn_cast(Start->getArgOperand(0))->getZExtValue(); + Size = alignTo(Size, MinAlignment); + tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size); + untagAlloca(AI, Info.LifetimeEnd[0], Size); + } else { + uint64_t Size = alignTo(getAllocaSizeInBytes(*Info.AI), MinAlignment); + tagAlloca(AI, TagPCall->getNextNode(), + IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()), Size); + for (auto &RI : RetVec) { + untagAlloca(AI, RI, Size); + } + // We may have inserted tag/untag outside of any lifetime interval. + // Remove all lifetime intrinsics for this alloca. + for (auto &II : Info.LifetimeStart) + II->eraseFromParent(); + for (auto &II : Info.LifetimeEnd) + II->eraseFromParent(); + } + } + + // If we have instrumented at least one alloca, all unrecognized lifetime + // instrinsics have to go. + for (auto &I : UnrecognizedLifetimes) + I->eraseFromParent(); + + return true; +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -179,6 +179,7 @@ initializeFalkorMarkStridedAccessesLegacyPass(*PR); initializeLDTLSCleanupPass(*PR); initializeAArch64SpeculationHardeningPass(*PR); + initializeAArch64StackTaggingPass(*PR); } //===----------------------------------------------------------------------===// @@ -446,6 +447,8 @@ // invariant. addPass(createLICMPass()); } + + addPass(createAArch64StackTaggingPass()); } // Pass Pipeline Configuration diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -55,6 +55,7 @@ AArch64RegisterInfo.cpp AArch64SelectionDAGInfo.cpp AArch64SpeculationHardening.cpp + AArch64StackTagging.cpp AArch64StorePairSuppress.cpp AArch64Subtarget.cpp AArch64TargetMachine.cpp diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -25,6 +25,7 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: AArch64 Stack Tagging ; CHECK-NEXT: Rewrite Symbols ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -55,6 +55,7 @@ ; CHECK-NEXT: Interleaved Load Combine Pass ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Interleaved Access Pass +; CHECK-NEXT: AArch64 Stack Tagging ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Rewrite Symbols diff --git a/llvm/test/CodeGen/AArch64/stack-tagging.ll b/llvm/test/CodeGen/AArch64/stack-tagging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stack-tagging.ll @@ -0,0 +1,186 @@ +; RUN: opt < %s -stack-tagging -S -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +declare void @use8(i8*) +declare void @use32(i32*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +define void @OneVar() sanitize_memtag { +entry: + %x = alloca i32, align 4 + call void @use32(i32* %x) + ret void +} + +; CHECK-LABEL: define void @OneVar( +; CHECK: [[BASE:%.*]] = call i8* @llvm.aarch64.irg.sp(i64 0) +; CHECK: [[X:%.*]] = alloca i32, align 16 +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp.p0i32(i32* [[X]], i8* [[BASE]], i64 0) +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.settag(i8* [[TX8]], i64 16) +; CHECK: call void @use32(i32* [[TX]]) +; CHECK: [[TX8_2:%.*]] = bitcast i32* %x to i8* +; CHECK: call void @llvm.aarch64.settag(i8* [[TX8_2]], i64 16) +; CHECK: ret void + + +define void @ManyVars() sanitize_memtag { +entry: + %x1 = alloca i32, align 4 + %x2 = alloca i8, align 4 + %x3 = alloca i32, i32 11, align 4 + call void @use32(i32* %x1) + call void @use8(i8* %x2) + call void @use32(i32* %x3) + ret void +} + +; CHECK-LABEL: define void @ManyVars( +; CHECK: alloca i32, align 16 +; CHECK: call i32* @llvm.aarch64.tagp.p0i32(i32* {{.*}}, i64 0) +; CHECK: call void @llvm.aarch64.settag(i8* {{.*}}, i64 16) +; CHECK: alloca i8, align 16 +; CHECK: call i8* @llvm.aarch64.tagp.p0i8(i8* {{.*}}, i64 1) +; CHECK: call void @llvm.aarch64.settag(i8* {{.*}}, i64 16) +; CHECK: alloca i32, i32 11, align 16 +; CHECK: call i32* @llvm.aarch64.tagp.p0i32(i32* {{.*}}, i64 2) +; CHECK: call void @llvm.aarch64.settag(i8* {{.*}}, i64 48) + +; CHECK: call void @use32( +; CHECK: call void @use8( +; CHECK: call void @use32( + +; CHECK: call void @llvm.aarch64.settag(i8* %2, i64 16) +; CHECK: call void @llvm.aarch64.settag(i8* %x2, i64 16) +; CHECK: call void @llvm.aarch64.settag(i8* %3, i64 48) +; CHECK-NEXT: ret void + + +define void @Scope(i32 %b) sanitize_memtag { +entry: + %x = alloca i32, align 4 + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + call void @use8(i8* %0) #3 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + br label %if.end + +if.end: + ret void +} + +; CHECK-LABEL: define void @Scope( +; CHECK: br i1 +; CHECK: call void @llvm.lifetime.start.p0i8( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: call void @use8( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: call void @llvm.lifetime.end.p0i8( +; CHECK: br label +; CHECK: ret void + + +; Spooked by the multiple lifetime ranges, StackTagging remove all of them and sets tags on entry and exit. +define void @BadScope(i32 %b) sanitize_memtag { +entry: + %x = alloca i32, align 4 + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + call void @use8(i8* %0) #3 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + call void @use8(i8* %0) #3 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + br label %if.end + +if.end: + ret void +} + +; CHECK-LABEL: define void @BadScope( +; CHECK: call void @llvm.aarch64.settag(i8* %0, i64 16) +; CHECK: br i1 +; CHECK: call void @use8(i8* %1) +; CHECK-NEXT: call void @use8(i8* %1) +; CHECK: br label +; CHECK: call void @llvm.aarch64.settag(i8* %2, i64 16) +; CHECK-NEXT: ret void + +define void @DynamicAllocas(i32 %cnt) sanitize_memtag { +entry: + %x = alloca i32, i32 %cnt, align 4 + br label %l +l: + %y = alloca i32, align 4 + call void @use32(i32* %x) + call void @use32(i32* %y) + ret void +} + +; CHECK-LABEL: define void @DynamicAllocas( +; CHECK-NOT: @llvm.aarch64.irg.sp +; CHECK: %x = alloca i32, i32 %cnt, align 16 +; CHECK-NOT: @llvm.aarch64.irg.sp +; CHECK: alloca i32, align 16 +; CHECK-NOT: @llvm.aarch64.irg.sp +; CHECK: ret void + +; If we can't trace one of the lifetime markers to a single alloca, fall back +; to poisoning all allocas at the beginning of the function. +; Each alloca must be poisoned only once. +define void @UnrecognizedLifetime(i8 %v) sanitize_memtag { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %cx = bitcast i32* %x to i8* + %cy = bitcast i32* %y to i8* + %cz = bitcast i32* %z to i8* + %tobool = icmp eq i8 %v, 0 + %xy = select i1 %tobool, i32* %x, i32* %y + %cxcy = select i1 %tobool, i8* %cx, i8* %cy + br label %another_bb + +another_bb: + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %cz) + store i32 7, i32* %z + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %cz) + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %cz) + store i32 7, i32* %z + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %cz) + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %cxcy) + store i32 8, i32* %xy + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %cxcy) + ret void +} + +; CHECK-LABEL: define void @UnrecognizedLifetime( +; CHECK: call i8* @llvm.aarch64.irg.sp(i64 0) +; CHECK: alloca i32, align 16 +; CHECK: call i32* @llvm.aarch64.tagp.p0i32( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: alloca i32, align 16 +; CHECK: call i32* @llvm.aarch64.tagp.p0i32( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: alloca i32, align 16 +; CHECK: call i32* @llvm.aarch64.tagp.p0i32( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: call void @llvm.aarch64.settag( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: call void @llvm.aarch64.settag( +; CHECK: ret void