Index: llvm/trunk/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.h +++ llvm/trunk/lib/Target/AArch64/AArch64.h @@ -56,7 +56,7 @@ createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); FunctionPass *createAArch64PreLegalizeCombiner(); -FunctionPass *createAArch64StackTaggingPass(); +FunctionPass *createAArch64StackTaggingPass(bool MergeInit); void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); Index: llvm/trunk/lib/Target/AArch64/AArch64StackTagging.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64StackTagging.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64StackTagging.cpp @@ -55,9 +55,215 @@ #define DEBUG_TYPE "stack-tagging" +static cl::opt ClMergeInit( + "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("merge stack variable initializers with tagging when possible")); + +static cl::opt ClScanLimit("stack-tagging-merge-init-scan-limit", + cl::init(40), cl::Hidden); + static constexpr unsigned kTagGranuleSize = 16; namespace { + +class InitializerBuilder { + uint64_t Size; + const DataLayout *DL; + Value *BasePtr; + Function *SetTagFn; + Function *SetTagZeroFn; + Function *StgpFn; + + // List of initializers sorted by start offset. + struct Range { + uint64_t Start, End; + Instruction *Inst; + }; + SmallVector Ranges; + // 8-aligned offset => 8-byte initializer + // Missing keys are zero initialized. + std::map Out; + +public: + InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, + Function *SetTagFn, Function *SetTagZeroFn, + Function *StgpFn) + : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), + SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} + + bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), Start, + [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; }); + if (I != Ranges.end() && End > I->Start) { + // Overlap - bail. + return false; + } + Ranges.insert(I, {Start, End, Inst}); + return true; + } + + bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { + int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType()); + if (!addRange(Offset, Offset + StoreSize, SI)) + return false; + IRBuilder<> IRB(SI); + applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0)); + return true; + } + + bool addMemSet(uint64_t Offset, MemSetInst *MSI) { + uint64_t StoreSize = cast(MSI->getLength())->getZExtValue(); + if (!addRange(Offset, Offset + StoreSize, MSI)) + return false; + IRBuilder<> IRB(MSI); + applyMemSet(IRB, Offset, Offset + StoreSize, + cast(MSI->getValue())); + return true; + } + + void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, + ConstantInt *V) { + // Out[] does not distinguish between zero and undef, and we already know + // that this memset does not overlap with any other initializer. Nothing to + // do for memset(0). + if (V->isZero()) + return; + for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { + uint64_t Cst = 0x0101010101010101UL; + int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; + if (LowBits) + Cst = (Cst >> LowBits) << LowBits; + int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; + if (HighBits) + Cst = (Cst << HighBits) >> HighBits; + ConstantInt *C = + ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue()); + + Value *&CurrentV = Out[Offset]; + if (!CurrentV) { + CurrentV = C; + } else { + CurrentV = IRB.CreateOr(CurrentV, C); + } + } + } + + // Take a 64-bit slice of the value starting at the given offset (in bytes). + // Offset can be negative. Pad with zeroes on both sides when necessary. + Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { + if (Offset > 0) { + V = IRB.CreateLShr(V, Offset * 8); + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + } else if (Offset < 0) { + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + V = IRB.CreateShl(V, -Offset * 8); + } else { + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + } + return V; + } + + void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, + Value *StoredValue) { + StoredValue = flatten(IRB, StoredValue); + for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { + Value *V = sliceValue(IRB, StoredValue, Offset - Start); + Value *&CurrentV = Out[Offset]; + if (!CurrentV) { + CurrentV = V; + } else { + CurrentV = IRB.CreateOr(CurrentV, V); + } + } + } + + void generate(IRBuilder<> &IRB) { + LLVM_DEBUG(dbgs() << "Combined initializer\n"); + // No initializers => the entire allocation is undef. + if (Ranges.empty()) { + emitUndef(IRB, 0, Size); + return; + } + + // Look through 8-byte initializer list 16 bytes at a time; + // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. + // Otherwise, emit zeroes up to next available item. + uint64_t LastOffset = 0; + for (uint64_t Offset = 0; Offset < Size; Offset += 16) { + auto I1 = Out.find(Offset); + auto I2 = Out.find(Offset + 8); + if (I1 == Out.end() && I2 == Out.end()) + continue; + + if (Offset > LastOffset) + emitZeroes(IRB, LastOffset, Offset - LastOffset); + + Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) + : I1->second; + Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) + : I2->second; + emitPair(IRB, Offset, Store1, Store2); + LastOffset = Offset + 16; + } + + // memset(0) does not update Out[], therefore the tail can be either undef + // or zero. + if (LastOffset < Size) + emitZeroes(IRB, LastOffset, Size - LastOffset); + + for (const auto &R : Ranges) { + R.Inst->eraseFromParent(); + } + } + + void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size + << ") zero\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(SetTagZeroFn, + {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + } + + void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size + << ") undef\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + } + + void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n"); + LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(StgpFn, {Ptr, A, B}); + } + + Value *flatten(IRBuilder<> &IRB, Value *V) { + if (V->getType()->isIntegerTy()) + return V; + // vector of pointers -> vector of ints + if (VectorType *VecTy = dyn_cast(V->getType())) { + LLVMContext &Ctx = IRB.getContext(); + Type *EltTy = VecTy->getElementType(); + if (EltTy->isPointerTy()) { + uint32_t EltSize = DL->getTypeSizeInBits(EltTy); + Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize), + VecTy->getNumElements()); + V = IRB.CreatePointerCast(V, NewTy); + } + } + return IRB.CreateBitOrPointerCast( + V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8)); + } +}; + class AArch64StackTagging : public FunctionPass { struct AllocaInfo { AllocaInst *AI; @@ -67,10 +273,15 @@ int Tag; // -1 for non-tagged allocations }; + bool MergeInit; + public: static char ID; // Pass ID, replacement for typeid - AArch64StackTagging() : FunctionPass(ID) { + AArch64StackTagging(bool MergeInit = true) + : FunctionPass(ID), + MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit + : MergeInit) { initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); } @@ -81,6 +292,9 @@ uint64_t Size); void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); + Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, + uint64_t Size, InitializerBuilder &IB); + Instruction * insertBaseTaggedPointer(const MapVector &Allocas, const DominatorTree *DT); @@ -92,9 +306,12 @@ Function *F; Function *SetTagFunc; const DataLayout *DL; + AAResults *AA; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (MergeInit) + AU.addRequired(); } }; @@ -107,8 +324,68 @@ INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", false, false) -FunctionPass *llvm::createAArch64StackTaggingPass() { - return new AArch64StackTagging(); +FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) { + return new AArch64StackTagging(MergeInit); +} + +Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, + Value *StartPtr, + uint64_t Size, + InitializerBuilder &IB) { + MemoryLocation AllocaLoc{StartPtr, Size}; + Instruction *LastInst = StartInst; + BasicBlock::iterator BI(StartInst); + + unsigned Count = 0; + for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { + if (!isa(*BI)) + ++Count; + + if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc))) + continue; + + if (!isa(BI) && !isa(BI)) { + // If the instruction is readnone, ignore it, otherwise bail out. We + // don't even allow readonly here because we don't want something like: + // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). + if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) + break; + continue; + } + + if (StoreInst *NextStore = dyn_cast(BI)) { + if (!NextStore->isSimple()) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!isPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, + *DL)) + break; + + if (!IB.addStore(Offset, NextStore, DL)) + break; + LastInst = NextStore; + } else { + MemSetInst *MSI = cast(BI); + + if (MSI->isVolatile() || !isa(MSI->getLength())) + break; + + if (!isa(MSI->getValue())) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!isPointerOffset(StartPtr, MSI->getDest(), Offset, *DL)) + break; + + if (!IB.addMemSet(Offset, MSI)) + break; + LastInst = MSI; + } + } + return LastInst; } bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { @@ -127,8 +404,23 @@ void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, uint64_t Size) { + auto SetTagZeroFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero); + auto StgpFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp); + + InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); + bool LittleEndian = + Triple(AI->getModule()->getTargetTriple()).isLittleEndian(); + // Current implementation of initializer merging assumes little endianness. + if (MergeInit && !F->hasOptNone() && LittleEndian) { + LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI + << ", size = " << Size << "\n"); + InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB); + } + IRBuilder<> IRB(InsertBefore); - IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + IB.generate(IRB); } void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, @@ -205,6 +497,8 @@ F = &Fn; DL = &Fn.getParent()->getDataLayout(); + if (MergeInit) + AA = &getAnalysis().getAAResults(); MapVector Allocas; // need stable iteration order SmallVector RetVec; Index: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -448,7 +448,8 @@ addPass(createLICMPass()); } - addPass(createAArch64StackTaggingPass()); + addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() != + CodeGenOpt::None)); } // Pass Pipeline Configuration Index: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll @@ -55,6 +55,8 @@ ; CHECK-NEXT: Interleaved Load Combine Pass ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Interleaved Access Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: AArch64 Stack Tagging ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare Index: llvm/trunk/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll +++ llvm/trunk/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll @@ -0,0 +1,308 @@ +; RUN: opt < %s -stack-tagging -S -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +declare void @use(i8*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +define void @OneVarNoInit() sanitize_memtag { +entry: + %x = alloca i32, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @OneVarNoInit( +; CHECK-DAG: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 +; CHECK-DAG: [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp.{{.*}}({ i32, [12 x i8] }* [[X]], {{.*}}, i64 0) +; CHECK-DAG: [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32* +; CHECK-DAG: [[TX8:%.*]] = bitcast i32* [[TX32]] to i8* +; CHECK-DAG: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TX8]]) +; CHECK-DAG: call void @llvm.aarch64.settag(i8* [[TX8]], i64 16) +; CHECK-DAG: call void @use(i8* nonnull [[TX8]]) +; CHECK-DAG: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TX8]]) + +define void @OneVarInitConst() sanitize_memtag { +entry: + %x = alloca i32, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + store i32 42, i32* %x, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @OneVarInitConst( +; CHECK: [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp +; CHECK: [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32* +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX32]] to i8* +; CHECK-NOT: aarch64.settag +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0) +; Untagging before lifetime.end: +; CHECK: call void @llvm.aarch64.settag( +; CHECK-NOT: aarch64.settag +; CHECK: ret void + +define void @ArrayInitConst() sanitize_memtag { +entry: + %x = alloca i32, i32 16, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) + store i32 42, i32* %x, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @ArrayInitConst( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0) +; CHECK: [[TX8_16:%.*]] = getelementptr i8, i8* [[TX8]], i32 16 +; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48) +; CHECK: ret void + +define void @ArrayInitConst2() sanitize_memtag { +entry: + %x = alloca i32, i32 16, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) + store i32 42, i32* %x, align 4 + %1 = getelementptr i32, i32* %x, i32 1 + store i32 43, i32* %1, align 4 + %2 = getelementptr i32, i32* %x, i32 2 + %3 = bitcast i32* %2 to i64* + store i64 -1, i64* %3, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @ArrayInitConst2( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 184683593770, i64 -1) +; CHECK: [[TX8_16:%.*]] = getelementptr i8, i8* [[TX8]], i32 16 +; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48) +; CHECK: ret void + +define void @ArrayInitConstSplit() sanitize_memtag { +entry: + %x = alloca i32, i32 16, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) + %1 = getelementptr i32, i32* %x, i32 1 + %2 = bitcast i32* %1 to i64* + store i64 -1, i64* %2, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @ArrayInitConstSplit( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 -4294967296, i64 4294967295) +; CHECK: ret void + +define void @ArrayInitConstWithHoles() sanitize_memtag { +entry: + %x = alloca i32, i32 32, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %0) + %1 = getelementptr i32, i32* %x, i32 5 + store i32 42, i32* %1, align 4 + %2 = getelementptr i32, i32* %x, i32 14 + store i32 43, i32* %2, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @ArrayInitConstWithHoles( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 16) +; CHECK: [[TX8_16:%.*]] = getelementptr i8, i8* %0, i32 16 +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8_16]], i64 180388626432, i64 0) +; CHECK: [[TX8_32:%.*]] = getelementptr i8, i8* %0, i32 32 +; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_32]], i64 16) +; CHECK: [[TX8_48:%.*]] = getelementptr i8, i8* %0, i32 48 +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8_48]], i64 0, i64 43) +; CHECK: [[TX8_64:%.*]] = getelementptr i8, i8* %0, i32 64 +; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_64]], i64 64) +; CHECK: ret void + +define void @InitNonConst(i32 %v) sanitize_memtag { +entry: + %x = alloca i32, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + store i32 %v, i32* %x, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @InitNonConst( +; CHECK: [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp +; CHECK: [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32* +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX32]] to i8* +; CHECK: [[V:%.*]] = zext i32 %v to i64 +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[V]], i64 0) +; CHECK: ret void + +define void @InitNonConst2(i32 %v, i32 %w) sanitize_memtag { +entry: + %x = alloca i32, i32 4, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) + store i32 %v, i32* %x, align 4 + %1 = getelementptr i32, i32* %x, i32 1 + store i32 %w, i32* %1, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @InitNonConst2( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: [[V:%.*]] = zext i32 %v to i64 +; CHECK: [[W:%.*]] = zext i32 %w to i64 +; CHECK: [[WS:%.*]] = shl i64 [[W]], 32 +; CHECK: [[VW:%.*]] = or i64 [[V]], [[WS]] +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[VW]], i64 0) +; CHECK: ret void + +define void @InitVector() sanitize_memtag { +entry: + %x = alloca i32, i32 4, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) + %1 = bitcast i32* %x to <2 x i32>* + store <2 x i32> , <2 x i32>* %1, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @InitVector( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 bitcast (<2 x i32> to i64), i64 0) +; CHECK: ret void + +define void @InitVectorPtr(i32* %p) sanitize_memtag { +entry: + %s = alloca <4 x i32*>, align 8 + %v0 = insertelement <4 x i32*> undef, i32* %p, i32 0 + %v1 = shufflevector <4 x i32*> %v0, <4 x i32*> undef, <4 x i32> zeroinitializer + store <4 x i32*> %v1, <4 x i32*>* %s + %0 = bitcast <4 x i32*>* %s to i8* + call void @use(i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @InitVectorPtr( +; CHECK: call <4 x i32*>* @llvm.aarch64.tagp +; CHECK: [[V1:%.*]] = shufflevector +; CHECK: [[V2:%.*]] = ptrtoint <4 x i32*> [[V1]] to <4 x i64> +; CHECK: [[V3:%.*]] = bitcast <4 x i64> [[V2]] to i256 +; CHECK: [[A1:%.*]] = trunc i256 [[V3]] to i64 +; CHECK: [[A2_:%.*]] = lshr i256 [[V3]], 64 +; CHECK: [[A2:%.*]] = trunc i256 [[A2_]] to i64 +; CHECK: [[A3_:%.*]] = lshr i256 [[V3]], 128 +; CHECK: [[A3:%.*]] = trunc i256 [[A3_]] to i64 +; CHECK: [[A4_:%.*]] = lshr i256 [[V3]], 192 +; CHECK: [[A4:%.*]] = trunc i256 [[A4_]] to i64 +; CHECK: call void @llvm.aarch64.stgp({{.*}}, i64 [[A1]], i64 [[A2]]) +; CHECK: call void @llvm.aarch64.stgp({{.*}}, i64 [[A3]], i64 [[A4]]) +; CHECK: ret void + +define void @InitVectorSplit() sanitize_memtag { +entry: + %x = alloca i32, i32 4, align 4 + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) + %1 = getelementptr i32, i32* %x, i32 1 + %2 = bitcast i32* %1 to <2 x i32>* + store <2 x i32> , <2 x i32>* %2, align 4 + call void @use(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @InitVectorSplit( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 shl (i64 bitcast (<2 x i32> to i64), i64 32), i64 lshr (i64 bitcast (<2 x i32> to i64), i64 32)) +; CHECK: ret void + +define void @MemSetZero() sanitize_memtag { +entry: + %x = alloca i32, i32 8, align 16 + %0 = bitcast i32* %x to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 0, i64 32, i1 false) + call void @use(i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @MemSetZero( +; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp +; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 32) +; CHECK: ret void + + +define void @MemSetNonZero() sanitize_memtag { +entry: + %x = alloca i32, i32 8, align 16 + %0 = bitcast i32* %x to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 42, i64 32, i1 false) + call void @use(i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @MemSetNonZero( +; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266) +; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266) +; CHECK: ret void + + +define void @MemSetNonZero2() sanitize_memtag { +entry: + %x = alloca [32 x i8], align 16 + %0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2 + call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 28, i1 false) + call void @use(i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @MemSetNonZero2( +; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199209472, i64 3038287259199220266) +; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 46360584399402) +; CHECK: ret void + +define void @MemSetNonZero3() sanitize_memtag { +entry: + %x = alloca [32 x i8], align 16 + %0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2 + call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 4, i1 false) + %1 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 24 + call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 42, i64 8, i1 false) + call void @use(i8* nonnull %0) + ret void +} + +; CHECK-LABEL: define void @MemSetNonZero3( +; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 46360584388608, i64 0) +; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 0, i64 3038287259199220266) +; CHECK: ret void