diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -31,12 +31,15 @@ MaybeAlign Alignment; // The mask Value, if we're looking at a masked load/store. Value *MaybeMask; + // The EVL Value, if we're looking at a vp intrinsic. + Value *MaybeEVL; InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite, class Type *OpType, MaybeAlign Alignment, - Value *MaybeMask = nullptr) + Value *MaybeMask = nullptr, + Value *MaybeEVL = nullptr) : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment), - MaybeMask(MaybeMask) { + MaybeMask(MaybeMask), MaybeEVL(MaybeEVL) { const DataLayout &DL = I->getModule()->getDataLayout(); TypeStoreSize = DL.getTypeStoreSizeInBits(OpType); PtrUse = &I->getOperandUse(OperandNo); diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -494,6 +494,18 @@ Instruction *InsertBefore, std::function Func); +/// Utility function for performing a given action on each lane of a vector +/// with \p EVL effective length. EVL is assumed > 0. To simplify porting legacy +/// code, this defaults to unrolling the implied loop for non-scalable element +/// counts, but this is not considered to be part of the contract of this +/// routine, and is expected to change in the future. The callback takes as +/// arguments an IRBuilder whose insert point is correctly set for instantiating +/// the given index, and a value which is (at runtime) the index to access. This +/// index *may* be a constant. +void SplitBlockAndInsertForEachLane( + Value *End, Instruction *InsertBefore, + std::function Func); + /// Check whether BB is the merge point of a if-region. /// If so, return the branch instruction that determines which entry into /// BB will be taken. Also, return by references the block that will be diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -656,6 +656,7 @@ : UseAfterReturn), SSGI(SSGI) { C = &(M.getContext()); + DL = &M.getDataLayout(); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); Int8PtrTy = Type::getInt8PtrTy(*C); @@ -734,6 +735,7 @@ }; LLVMContext *C; + const DataLayout *DL; Triple TargetTriple; int LongSize; bool CompileKernel; @@ -1319,8 +1321,9 @@ XCHG->getCompareOperand()->getType(), std::nullopt); } else if (auto CI = dyn_cast(I)) { - if (CI->getIntrinsicID() == Intrinsic::masked_load || - CI->getIntrinsicID() == Intrinsic::masked_store) { + switch (CI->getIntrinsicID()) { + case Intrinsic::masked_load: + case Intrinsic::masked_store: { bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_store; // Masked store has an initial operand for the value. unsigned OpOffset = IsWrite ? 1 : 0; @@ -1337,7 +1340,24 @@ Alignment = Op->getMaybeAlignValue(); Value *Mask = CI->getOperand(2 + OpOffset); Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); - } else { + break; + } + case Intrinsic::vp_load: + case Intrinsic::vp_store: { + auto *VPI = cast(CI); + unsigned IID = CI->getIntrinsicID(); + bool IsWrite = IID == Intrinsic::vp_store; + if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) + return; + unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(*DL); + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment, + VPI->getMaskParam(), + VPI->getVectorLengthParam()); + break; + } + default: for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || ignoreAccess(I, CI->getArgOperand(ArgNo))) @@ -1434,17 +1454,35 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, - Value *Mask, Instruction *I, + Value *Mask, Value *EVL, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { auto *VTy = cast(OpType); - TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); auto Zero = ConstantInt::get(IntptrTy, 0); - SplitBlockAndInsertForEachLane(VTy->getElementCount(), IntptrTy, I, + IRBuilder IB(I); + Instruction *LoopInsertBefore = I; + if (EVL) { + // The end argument of SplitBlockAndInsertForLane is assumed bigger + // than zero, so we should check whether EVL is zero here. + Type *EVLType = EVL->getType(); + Value *IsEVLZero = IB.CreateICmpNE(EVL, ConstantInt::get(EVLType, 0)); + LoopInsertBefore = SplitBlockAndInsertIfThen(IsEVLZero, I, false); + IB.SetInsertPoint(LoopInsertBefore); + // Cast EVL to IntptrTy. + EVL = IB.CreateZExtOrTrunc(EVL, IntptrTy); + // To avoid undefined behavior for extracting with out of range index, use + // the minimum of evl and element count as trip count. + Value *EC = IB.CreateElementCount(IntptrTy, VTy->getElementCount()); + EVL = IB.CreateBinaryIntrinsic(Intrinsic::umin, EVL, EC); + } else { + EVL = IB.CreateElementCount(IntptrTy, VTy->getElementCount()); + } + + SplitBlockAndInsertForEachLane(EVL, LoopInsertBefore, [&](IRBuilderBase &IRB, Value *Index) { Value *MaskElem = IRB.CreateExtractElement(Mask, Index); if (auto *MaskElemC = dyn_cast(MaskElem)) { @@ -1454,14 +1492,15 @@ // Unconditional check } else { // Conditional check - Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, &*IRB.GetInsertPoint(), false); + Instruction *ThenTerm = SplitBlockAndInsertIfThen( + MaskElem, &*IRB.GetInsertPoint(), false); IRB.SetInsertPoint(ThenTerm); } Value *InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); - doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, Alignment, - Granularity, ElemTypeSize, IsWrite, SizeArgument, - UseCalls, Exp); + doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), + InstrumentedAddress, Alignment, Granularity, + ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp); }); } @@ -1510,9 +1549,9 @@ unsigned Granularity = 1 << Mapping.Scale; if (O.MaybeMask) { - instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(), - Addr, O.Alignment, Granularity, O.OpType, - O.IsWrite, nullptr, UseCalls, Exp); + instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL, + O.getInsn(), Addr, O.Alignment, Granularity, + O.OpType, O.IsWrite, nullptr, UseCalls, Exp); } else { doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls, diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1651,6 +1651,27 @@ } } +void llvm::SplitBlockAndInsertForEachLane( + Value *EVL, Instruction *InsertBefore, + std::function Func) { + + IRBuilder<> IRB(InsertBefore); + Type *Ty = EVL->getType(); + + if (!isa(EVL)) { + auto [BodyIP, Index] = SplitBlockAndInsertSimpleForLoop(EVL, InsertBefore); + IRB.SetInsertPoint(BodyIP); + Func(IRB, Index); + return; + } + + unsigned Num = cast(EVL)->getZExtValue(); + for (unsigned Idx = 0; Idx < Num; ++Idx) { + IRB.SetInsertPoint(InsertBefore); + Func(IRB, ConstantInt::get(Ty, Idx)); + } +} + BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast(BB->begin()); diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll @@ -0,0 +1,325 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \ +; RUN: | FileCheck %s +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \ +; RUN: | FileCheck %s -check-prefix=DISABLED + +; Support ASan instrumentation for constant-mask llvm.vp.{load,store} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +;;;;;;;;;;;;;;;; STORE +declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind + +define void @store.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.variable( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]]) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.variable( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl) + ret void +} + +;; Store using two vp.stores, which should instrument them both. +define void @store.v4f32.1010.split(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.1010.split( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> , i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]]) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0 +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT1:%.*]] +; CHECK: .split1: +; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> , i64 [[IV2]] +; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]] +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 +; CHECK-NEXT: call void @__asan_store4(i64 [[TMP18]]) +; CHECK-NEXT: br label [[TMP19]] +; CHECK: 19: +; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1 +; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]] +; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]] +; CHECK: .split1.split: +; CHECK-NEXT: br label [[TMP20]] +; CHECK: 20: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.1010.split( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +;; Store using a vp.store after a full store. Shouldn't instrument the second one. +define void @store.v4f32.0010.after.full.store(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.0010.after.full.store( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__asan_store16(i64 [[TMP1]]) +; CHECK-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P]], align 16 +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.0010.after.full.store( +; DISABLED-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16 +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + store <4 x float> %arg, ptr %p + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +;;;;;;;;;;;;;;;; LOAD +declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind +declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind + +define <4 x float> @load.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.variable( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]]) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES]] +; +; DISABLED-LABEL: @load.v4f32.variable( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl) + ret <4 x float> %res +} + +;; Load using two vp.loads, which should instrument them both. +define <4 x float> @load.v4f32.1001.split(ptr align 4 %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001.split( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> , i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]]) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0 +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT1:%.*]] +; CHECK: .split1: +; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> , i64 [[IV2]] +; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]] +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 +; CHECK-NEXT: call void @__asan_load4(i64 [[TMP18]]) +; CHECK-NEXT: br label [[TMP19]] +; CHECK: 19: +; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1 +; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]] +; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]] +; CHECK: .split1.split: +; CHECK-NEXT: br label [[TMP20]] +; CHECK: 20: +; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES2]] +; +; DISABLED-LABEL: @load.v4f32.1001.split( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; DISABLED-NEXT: ret <4 x float> [[RES2]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res2 +} + +;; Load using a vp.load after a full load. Shouldn't instrument the second one. +define <4 x float> @load.v4f32.1001.after.full.load(ptr align 4 %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001.after.full.load( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__asan_load16(i64 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16 +; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; CHECK-NEXT: ret <4 x float> [[RES2]] +; +; DISABLED-LABEL: @load.v4f32.1001.after.full.load( +; DISABLED-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES2]] +; + %res = load <4 x float>, ptr %p + %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res2 +} + +;; Scalable vector tests +;; --------------------------- +declare @llvm.vp.load.nxv4f32.p0(ptr, , i32) +declare void @llvm.vp.store.nxv4f32.p0(, ptr, , i32) + +define @scalable.load.nxv4f32(ptr align 4 %p, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.load.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_load4(i64 [[TMP10]]) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[RES:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[P]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret [[RES]] +; +; DISABLED-LABEL: @scalable.load.nxv4f32( +; DISABLED-NEXT: [[RES:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret [[RES]] +; + %res = tail call @llvm.vp.load.nxv4f32.p0(ptr %p, %mask, i32 %evl) + ret %res +} + +define void @scalable.store.nxv4f32(ptr align 4 %p, %arg, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.store.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +; CHECK-NEXT: call void @__asan_store4(i64 [[TMP10]]) +; CHECK-NEXT: br label [[TMP11]] +; CHECK: 11: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.vp.store.nxv4f32.p0( [[ARG:%.*]], ptr [[P]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @scalable.store.nxv4f32( +; DISABLED-NEXT: tail call void @llvm.vp.store.nxv4f32.p0( [[ARG:%.*]], ptr [[P:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.nxv4f32.p0( %arg, ptr %p, %mask, i32 %evl) + ret void +}