diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -31,12 +31,15 @@ MaybeAlign Alignment; // The mask Value, if we're looking at a masked load/store. Value *MaybeMask; + // The EVL Value, if we're looking at a vp intrinsic. + Value *MaybeEVL; InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite, class Type *OpType, MaybeAlign Alignment, - Value *MaybeMask = nullptr) + Value *MaybeMask = nullptr, + Value *MaybeEVL = nullptr) : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment), - MaybeMask(MaybeMask) { + MaybeMask(MaybeMask), MaybeEVL(MaybeEVL) { const DataLayout &DL = I->getModule()->getDataLayout(); TypeStoreSize = DL.getTypeStoreSizeInBits(OpType); PtrUse = &I->getOperandUse(OperandNo); diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -494,6 +494,12 @@ Instruction *InsertBefore, std::function Func); +// Similar to SplitBlockAndInsertForEachLane but gives action on lanes with +// indexes before \p End. +void SplitBlockAndInsertForLanes( + Value *End, Instruction *SplitBefore, + std::function Func); + /// Check whether BB is the merge point of a if-region. /// If so, return the branch instruction that determines which entry into /// BB will be taken. Also, return by references the block that will be diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -656,6 +656,7 @@ : UseAfterReturn), SSGI(SSGI) { C = &(M.getContext()); + DL = &M.getDataLayout(); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); Int8PtrTy = Type::getInt8PtrTy(*C); @@ -743,6 +744,7 @@ }; LLVMContext *C; + const DataLayout *DL; Triple TargetTriple; int LongSize; bool CompileKernel; @@ -1326,8 +1328,9 @@ XCHG->getCompareOperand()->getType(), std::nullopt); } else if (auto CI = dyn_cast(I)) { - if (CI->getIntrinsicID() == Intrinsic::masked_load || - CI->getIntrinsicID() == Intrinsic::masked_store) { + switch (CI->getIntrinsicID()) { + case Intrinsic::masked_load: + case Intrinsic::masked_store: { bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_store; // Masked store has an initial operand for the value. unsigned OpOffset = IsWrite ? 1 : 0; @@ -1344,7 +1347,24 @@ Alignment = Op->getMaybeAlignValue(); Value *Mask = CI->getOperand(2 + OpOffset); Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); - } else { + break; + } + case Intrinsic::vp_load: + case Intrinsic::vp_store: { + auto *VPI = cast(CI); + unsigned IID = CI->getIntrinsicID(); + bool IsWrite = IID == Intrinsic::vp_store; + if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) + return; + unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(*DL); + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment, + VPI->getMaskParam(), + VPI->getVectorLengthParam()); + break; + } + default: for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || ignoreAccess(I, CI->getArgOperand(ArgNo))) @@ -1441,7 +1461,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, - Value *Mask, Instruction *I, + Value *Mask, Value *EVL, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, Value *SizeArgument, @@ -1451,8 +1471,21 @@ TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); auto Zero = ConstantInt::get(IntptrTy, 0); - SplitBlockAndInsertForEachLane(VTy->getElementCount(), IntptrTy, I, - [&](IRBuilderBase &IRB, Value *Index) { + IRBuilder IB(I); + Value *End = IB.CreateElementCount(IntptrTy, VTy->getElementCount()); + if (EVL) { + // To avoid undefined behavior for extacting with out of range index, use + // the minimum of evl and element count as trip count. + if (IntptrTy->getPrimitiveSizeInBits() < + EVL->getType()->getPrimitiveSizeInBits()) + return; + if (IntptrTy->getPrimitiveSizeInBits() > + EVL->getType()->getPrimitiveSizeInBits()) + EVL = IB.CreateZExt(EVL, IntptrTy); + End = IB.CreateBinaryIntrinsic(Intrinsic::umin, EVL, End); + } + + SplitBlockAndInsertForLanes(End, I, [&](IRBuilderBase &IRB, Value *Index) { Value *MaskElem = IRB.CreateExtractElement(Mask, Index); if (auto *MaskElemC = dyn_cast(MaskElem)) { if (MaskElemC->isZero()) @@ -1461,14 +1494,15 @@ // Unconditional check } else { // Conditional check - Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, &*IRB.GetInsertPoint(), false); + Instruction *ThenTerm = + SplitBlockAndInsertIfThen(MaskElem, &*IRB.GetInsertPoint(), false); IRB.SetInsertPoint(ThenTerm); } Value *InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); - doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, Alignment, - Granularity, ElemTypeSize, IsWrite, SizeArgument, - UseCalls, Exp); + doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, + Alignment, Granularity, ElemTypeSize, IsWrite, + SizeArgument, UseCalls, Exp); }); } @@ -1517,9 +1551,9 @@ unsigned Granularity = 1 << Mapping.Scale; if (O.MaybeMask) { - instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(), - Addr, O.Alignment, Granularity, O.OpType, - O.IsWrite, nullptr, UseCalls, Exp); + instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL, + O.getInsn(), Addr, O.Alignment, Granularity, + O.OpType, O.IsWrite, nullptr, UseCalls, Exp); } else { doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls, diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1627,27 +1627,24 @@ return std::make_pair(LoopBody->getFirstNonPHI(), IV); } -void llvm::SplitBlockAndInsertForEachLane(ElementCount EC, - Type *IndexTy, Instruction *InsertBefore, - std::function Func) { +void llvm::SplitBlockAndInsertForLanes( + Value *End, Instruction *InsertBefore, + std::function Func) { IRBuilder<> IRB(InsertBefore); - if (EC.isScalable()) { - Value *NumElements = IRB.CreateElementCount(IndexTy, EC); - - auto [BodyIP, Index] = - SplitBlockAndInsertSimpleForLoop(NumElements, InsertBefore); + if (!isa(End)) { + auto [BodyIP, Index] = SplitBlockAndInsertSimpleForLoop(End, InsertBefore); IRB.SetInsertPoint(BodyIP); Func(IRB, Index); return; } - unsigned Num = EC.getFixedValue(); + unsigned Num = cast(End)->getZExtValue(); for (unsigned Idx = 0; Idx < Num; ++Idx) { IRB.SetInsertPoint(InsertBefore); - Func(IRB, ConstantInt::get(IndexTy, Idx)); + Func(IRB, ConstantInt::get(End->getType(), Idx)); } } diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll @@ -0,0 +1,285 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \ +; RUN: | FileCheck %s +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \ +; RUN: | FileCheck %s -check-prefix=DISABLED + +; Support ASan instrumentation for constant-mask llvm.vp.{load,store} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +;;;;;;;;;;;;;;;; STORE +declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind + +define void @store.v4f32.variable(ptr %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.variable( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP6]], i64 4) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.variable( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl) + ret void +} + +;; Store using two vp.stores, which should instrument them both. +define void @store.v4f32.1010.split(ptr %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.1010.split( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> , i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP6]], i64 4) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP8]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT_SPLIT_SPLIT:%.*]] +; CHECK: .split.split.split: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, [[DOTSPLIT_SPLIT]] ], [ [[IV1_NEXT:%.*]], [[TMP14:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> , i64 [[IV1]] +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP14]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV1]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP13]], i64 4) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 14: +; CHECK-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[IV1_CHECK:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TMP9]] +; CHECK-NEXT: br i1 [[IV1_CHECK]], label [[DOTSPLIT_SPLIT_SPLIT_SPLIT:%.*]], label [[DOTSPLIT_SPLIT_SPLIT]] +; CHECK: .split.split.split.split: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.1010.split( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +;; Store using a vp.store after a full store. Shouldn't instrument the second one. +define void @store.v4f32.0010.after.full.store(ptr %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.0010.after.full.store( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__asan_store16(i64 [[TMP1]]) +; CHECK-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P]], align 16 +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.0010.after.full.store( +; DISABLED-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16 +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + store <4 x float> %arg, ptr %p + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +;;;;;;;;;;;;;;;; LOAD +declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind +declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind + +define <4 x float> @load.v4f32.variable(ptr %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.variable( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP6]], i64 4) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES]] +; +; DISABLED-LABEL: @load.v4f32.variable( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl) + ret <4 x float> %res +} + +;; Load using two vp.loads, which should instrument them both. +define <4 x float> @load.v4f32.1001.split(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001.split( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> , i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP6]], i64 4) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP8]], i64 4) +; CHECK-NEXT: br label [[DOTSPLIT_SPLIT_SPLIT:%.*]] +; CHECK: .split.split.split: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, [[DOTSPLIT_SPLIT]] ], [ [[IV1_NEXT:%.*]], [[TMP14:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> , i64 [[IV1]] +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP14]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV1]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP13]], i64 4) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 14: +; CHECK-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[IV1_CHECK:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TMP9]] +; CHECK-NEXT: br i1 [[IV1_CHECK]], label [[DOTSPLIT_SPLIT_SPLIT_SPLIT:%.*]], label [[DOTSPLIT_SPLIT_SPLIT]] +; CHECK: .split.split.split.split: +; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES2]] +; +; DISABLED-LABEL: @load.v4f32.1001.split( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; DISABLED-NEXT: ret <4 x float> [[RES2]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res2 +} + +;; Load using a vp.load after a full load. Shouldn't instrument the second one. +define <4 x float> @load.v4f32.1001.after.full.load(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001.after.full.load( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__asan_load16(i64 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16 +; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; CHECK-NEXT: ret <4 x float> [[RES2]] +; +; DISABLED-LABEL: @load.v4f32.1001.after.full.load( +; DISABLED-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES2]] +; + %res = load <4 x float>, ptr %p + %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res2 +} + +;; Scalable vector tests +;; --------------------------- +declare @llvm.vp.load.nxv4f32.p0(ptr, , i32) +declare void @llvm.vp.store.nxv4f32.p0(, ptr, , i32) + +define @scalable.load.nxv4f32(ptr %p, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.load.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP2]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP8]], i64 4) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: [[RES:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[P]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret [[RES]] +; +; DISABLED-LABEL: @scalable.load.nxv4f32( +; DISABLED-NEXT: [[RES:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret [[RES]] +; + %res = tail call @llvm.vp.load.nxv4f32.p0(ptr %p, %mask, i32 %evl) + ret %res +} + +define void @scalable.store.nxv4f32(ptr %p, %arg, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.store.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP2]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP8]], i64 4) +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: tail call void @llvm.vp.store.nxv4f32.p0( [[ARG:%.*]], ptr [[P]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @scalable.store.nxv4f32( +; DISABLED-NEXT: tail call void @llvm.vp.store.nxv4f32.p0( [[ARG:%.*]], ptr [[P:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.nxv4f32.p0( %arg, ptr %p, %mask, i32 %evl) + ret void +}