diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -31,12 +31,15 @@ MaybeAlign Alignment; // The mask Value, if we're looking at a masked load/store. Value *MaybeMask; + // The EVL Value, if we're looking at a vp intrinsic. + Value *MaybeEVL; InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite, class Type *OpType, MaybeAlign Alignment, - Value *MaybeMask = nullptr) + Value *MaybeMask = nullptr, + Value *MaybeEVL = nullptr) : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment), - MaybeMask(MaybeMask) { + MaybeMask(MaybeMask), MaybeEVL(MaybeEVL) { const DataLayout &DL = I->getModule()->getDataLayout(); TypeStoreSize = DL.getTypeStoreSizeInBits(OpType); PtrUse = &I->getOperandUse(OperandNo); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -656,6 +656,7 @@ : UseAfterReturn), SSGI(SSGI) { C = &(M.getContext()); + DL = &M.getDataLayout(); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); Int8PtrTy = Type::getInt8PtrTy(*C); @@ -743,6 +744,7 @@ }; LLVMContext *C; + const DataLayout *DL; Triple TargetTriple; int LongSize; bool CompileKernel; @@ -1326,8 +1328,9 @@ XCHG->getCompareOperand()->getType(), std::nullopt); } else if (auto CI = dyn_cast(I)) { - if (CI->getIntrinsicID() == Intrinsic::masked_load || - CI->getIntrinsicID() == Intrinsic::masked_store) { + switch (CI->getIntrinsicID()) { + case Intrinsic::masked_load: + case Intrinsic::masked_store: { bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_store; // Masked store has an initial operand for the value. unsigned OpOffset = IsWrite ? 1 : 0; @@ -1344,7 +1347,24 @@ Alignment = Op->getMaybeAlignValue(); Value *Mask = CI->getOperand(2 + OpOffset); Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); - } else { + break; + } + case Intrinsic::vp_load: + case Intrinsic::vp_store: { + auto *VPI = cast(CI); + unsigned IID = CI->getIntrinsicID(); + bool IsWrite = IID == Intrinsic::vp_store; + if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) + return; + unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(*DL); + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment, + VPI->getMaskParam(), + VPI->getVectorLengthParam()); + break; + } + default: for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || ignoreAccess(I, CI->getArgOperand(ArgNo))) @@ -1441,7 +1461,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, - Value *Mask, Instruction *I, + Value *Mask, Value *EVL, Instruction *I, Value *Addr, MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, Value *SizeArgument, @@ -1451,6 +1471,10 @@ TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); auto Zero = ConstantInt::get(IntptrTy, 0); + // Skip if EVL has wider type then IntptrTy. + if (EVL && IntptrTy->getPrimitiveSizeInBits() < 32) + return; + SplitBlockAndInsertForEachLane(VTy->getElementCount(), IntptrTy, I, [&](IRBuilderBase &IRB, Value *Index) { Value *MaskElem = IRB.CreateExtractElement(Mask, Index); @@ -1458,10 +1482,24 @@ if (MaskElemC->isZero()) // No check return; - // Unconditional check + // Only need to check evl. + if (EVL) { + Value *ExtEVL = IRB.CreateZExt(EVL, IntptrTy); + Value *InEVLRange = IRB.CreateICmpULT(Index, ExtEVL); + Instruction *ThenTerm = SplitBlockAndInsertIfThen( + InEVLRange, &*IRB.GetInsertPoint(), false); + IRB.SetInsertPoint(ThenTerm); + } } else { - // Conditional check - Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, &*IRB.GetInsertPoint(), false); + // Check evl and mask. + Value *IsActive = MaskElem; + if (EVL) { + Value *ExtEVL = IRB.CreateZExt(EVL, IntptrTy); + Value *InEVLRange = IRB.CreateICmpULT(Index, ExtEVL); + IsActive = IRB.CreateAnd(IsActive, InEVLRange); + } + Instruction *ThenTerm = + SplitBlockAndInsertIfThen(IsActive, &*IRB.GetInsertPoint(), false); IRB.SetInsertPoint(ThenTerm); } @@ -1517,9 +1555,9 @@ unsigned Granularity = 1 << Mapping.Scale; if (O.MaybeMask) { - instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(), - Addr, O.Alignment, Granularity, O.OpType, - O.IsWrite, nullptr, UseCalls, Exp); + instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL, + O.getInsn(), Addr, O.Alignment, Granularity, + O.OpType, O.IsWrite, nullptr, UseCalls, Exp); } else { doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls, diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll @@ -0,0 +1,513 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \ +; RUN: | FileCheck %s +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \ +; RUN: | FileCheck %s -check-prefix=DISABLED + +; Support ASan instrumentation for constant-mask llvm.vp.{load,store} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +;;;;;;;;;;;;;;;; STORE +declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind +declare void @llvm.vp.store.v8i32.p0(<8 x i32>, ptr, <8 x i1>, i32) argmemonly nounwind +declare void @llvm.vp.store.v4p0.p0(<4 x ptr>, ptr, <4 x i1>, i32) argmemonly nounwind + +define void @store.v4f32.1110(ptr %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.1110( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 0, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP5]], i64 4) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 1, [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP11]], i64 4) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 2, [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP18:%.*]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP17]], i64 4) +; CHECK-NEXT: br label [[TMP18]] +; CHECK: 18: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.1110( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +define void @store.v8i32.10010110(ptr %p, <8 x i32> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v8i32.10010110( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 0, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <8 x i32>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP5]], i64 4) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 3, [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP11]], i64 4) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 5, [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP18:%.*]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 5 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP17]], i64 4) +; CHECK-NEXT: br label [[TMP18]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult i64 6, [[TMP19]] +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP21:%.*]], label [[TMP24:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP23]], i64 4) +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: tail call void @llvm.vp.store.v8i32.p0(<8 x i32> [[ARG:%.*]], ptr [[P]], <8 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v8i32.10010110( +; DISABLED-NEXT: tail call void @llvm.vp.store.v8i32.p0(<8 x i32> [[ARG:%.*]], ptr [[P:%.*]], <8 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %arg, ptr %p, <8 x i1> , i32 %evl) + ret void +} + +define void @store.v4i64.0001(ptr %p, <4 x ptr> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4i64.0001( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 3, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x ptr>, ptr [[P:%.*]], i64 0, i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP5]], i64 8) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: tail call void @llvm.vp.store.v4p0.p0(<4 x ptr> [[ARG:%.*]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4i64.0001( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4p0.p0(<4 x ptr> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4p0.p0(<4 x ptr> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +define void @store.v4f32.variable(ptr %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.variable( +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 0, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP1]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP8:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP7]], i64 4) +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[MASK]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i64 1, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP9]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label [[TMP13:%.*]], label [[TMP16:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 1 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP15]], i64 4) +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[MASK]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i64 2, [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = and i1 [[TMP17]], [[TMP19]] +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP21:%.*]], label [[TMP24:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP23]], i64 4) +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[MASK]], i64 3 +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 3, [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP25]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP29:%.*]], label [[TMP32:%.*]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3 +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP31]], i64 4) +; CHECK-NEXT: br label [[TMP32]] +; CHECK: 32: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.variable( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl) + ret void +} + +;; Store using two vp.stores, which should instrument them both. +define void @store.v4f32.1010.split(ptr %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.1010.split( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 2, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP5]], i64 4) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.1010.split( +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +;; Store using a vp.store after a full store. Shouldn't instrument the second one. +define void @store.v4f32.0010.after.full.store(ptr %p, <4 x float> %arg, i32 %evl) sanitize_address { +; CHECK-LABEL: @store.v4f32.0010.after.full.store( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__asan_store16(i64 [[TMP1]]) +; CHECK-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P]], align 16 +; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @store.v4f32.0010.after.full.store( +; DISABLED-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16 +; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + store <4 x float> %arg, ptr %p + tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> , i32 %evl) + ret void +} + +;;;;;;;;;;;;;;;; LOAD +declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind +declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind +declare <4 x ptr> @llvm.vp.load.v4p0.p0(ptr, <4 x i1>, i32) argmemonly nounwind + +define <8 x i32> @load.v8i32.11100001(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v8i32.11100001( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 0, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <8 x i32>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP5]], i64 4) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 1, [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP11]], i64 4) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 2, [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP18:%.*]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP17]], i64 4) +; CHECK-NEXT: br label [[TMP18]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult i64 7, [[TMP19]] +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP21:%.*]], label [[TMP24:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 7 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP23]], i64 4) +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: [[RES:%.*]] = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr [[P]], <8 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret <8 x i32> [[RES]] +; +; DISABLED-LABEL: @load.v8i32.11100001( +; DISABLED-NEXT: [[RES:%.*]] = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr [[P:%.*]], <8 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <8 x i32> [[RES]] +; + %res = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> , i32 %evl) + ret <8 x i32> %res +} + +define <4 x float> @load.v4f32.1001(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 0, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP5]], i64 4) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 3, [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP11]], i64 4) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES]] +; +; DISABLED-LABEL: @load.v4f32.1001( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res +} + +define <4 x ptr> @load.v4i64.0001(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4i64.0001( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 3, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x ptr>, ptr [[P:%.*]], i64 0, i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP5]], i64 8) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x ptr> @llvm.vp.load.v4p0.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret <4 x ptr> [[RES]] +; +; DISABLED-LABEL: @load.v4i64.0001( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x ptr> @llvm.vp.load.v4p0.p0(ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x ptr> [[RES]] +; + %res = tail call <4 x ptr> @llvm.vp.load.v4p0.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x ptr> %res +} + +define <4 x float> @load.v4f32.variable(ptr %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.variable( +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 0, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP1]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP8:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP7]], i64 4) +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[MASK]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i64 1, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP9]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label [[TMP13:%.*]], label [[TMP16:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 1 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP15]], i64 4) +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[MASK]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i64 2, [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = and i1 [[TMP17]], [[TMP19]] +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP21:%.*]], label [[TMP24:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP23]], i64 4) +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[MASK]], i64 3 +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 3, [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP25]], [[TMP27]] +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP29:%.*]], label [[TMP32:%.*]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3 +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP31]], i64 4) +; CHECK-NEXT: br label [[TMP32]] +; CHECK: 32: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES]] +; +; DISABLED-LABEL: @load.v4f32.variable( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl) + ret <4 x float> %res +} + +;; Load using two vp.loads, which should instrument them both. +define <4 x float> @load.v4f32.1001.split(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001.split( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 0, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP5]], i64 4) +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 3, [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP11]], i64 4) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; CHECK-NEXT: ret <4 x float> [[RES2]] +; +; DISABLED-LABEL: @load.v4f32.1001.split( +; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL]]) +; DISABLED-NEXT: ret <4 x float> [[RES2]] +; + %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res2 +} + +;; Load using a vp.load after a full load. Shouldn't instrument the second one. +define <4 x float> @load.v4f32.1001.after.full.load(ptr %p, i32 %evl) sanitize_address { +; CHECK-LABEL: @load.v4f32.1001.after.full.load( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__asan_load16(i64 [[TMP1]]) +; CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16 +; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; CHECK-NEXT: ret <4 x float> [[RES2]] +; +; DISABLED-LABEL: @load.v4f32.1001.after.full.load( +; DISABLED-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 +; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> , i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret <4 x float> [[RES2]] +; + %res = load <4 x float>, ptr %p + %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> , i32 %evl) + ret <4 x float> %res2 +} + +;; Scalable vector tests +;; --------------------------- +declare @llvm.vp.load.nxv4f32.p0(ptr, , i32) +declare void @llvm.vp.store.nxv4f32.p0(, ptr, , i32) + +define @scalable.load.nxv4f32(ptr %p, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.load.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[IV]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP10]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP9]], i64 4) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: [[RES:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[P]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret [[RES]] +; +; DISABLED-LABEL: @scalable.load.nxv4f32( +; DISABLED-NEXT: [[RES:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret [[RES]] +; + %res = tail call @llvm.vp.load.nxv4f32.p0(ptr %p, %mask, i32 %evl) + ret %res +} + +define void @scalable.store.nxv4f32(ptr %p, %arg, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.store.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[EVL:%.*]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[IV]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP10]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP9]], i64 4) +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: tail call void @llvm.vp.store.nxv4f32.p0( [[ARG:%.*]], ptr [[P]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @scalable.store.nxv4f32( +; DISABLED-NEXT: tail call void @llvm.vp.store.nxv4f32.p0( [[ARG:%.*]], ptr [[P:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.vp.store.nxv4f32.p0( %arg, ptr %p, %mask, i32 %evl) + ret void +}