diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -33,13 +33,16 @@ Value *MaybeMask; // The EVL Value, if we're looking at a vp intrinsic. Value *MaybeEVL; + // The Stride Value, if we're looking at a strided load/store. + Value *MaybeStride; InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite, class Type *OpType, MaybeAlign Alignment, Value *MaybeMask = nullptr, - Value *MaybeEVL = nullptr) + Value *MaybeEVL = nullptr, + Value *MaybeStride = nullptr) : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment), - MaybeMask(MaybeMask), MaybeEVL(MaybeEVL) { + MaybeMask(MaybeMask), MaybeEVL(MaybeEVL), MaybeStride(MaybeStride) { const DataLayout &DL = I->getModule()->getDataLayout(); TypeStoreSize = DL.getTypeStoreSizeInBits(OpType); PtrUse = &I->getOperandUse(OperandNo); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -695,6 +695,13 @@ TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp); + void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, + Type *IntptrTy, Value *Mask, Value *EVL, + Value *Stride, Instruction *I, Value *Addr, + MaybeAlign Alignment, unsigned Granularity, + Type *OpType, bool IsWrite, + Value *SizeArgument, bool UseCalls, + uint32_t Exp); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeStoreSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, @@ -1343,18 +1350,32 @@ break; } case Intrinsic::vp_load: - case Intrinsic::vp_store: { + case Intrinsic::vp_store: + case Intrinsic::experimental_vp_strided_load: + case Intrinsic::experimental_vp_strided_store: { auto *VPI = cast(CI); unsigned IID = CI->getIntrinsicID(); - bool IsWrite = IID == Intrinsic::vp_store; + bool IsWrite = CI->getType()->isVoidTy(); if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) return; unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(*DL); + Value *Stride = nullptr; + if (IID == Intrinsic::experimental_vp_strided_store || + IID == Intrinsic::experimental_vp_strided_load) { + Stride = VPI->getOperand(PtrOpNo + 1); + // Use the pointer alignment as the element alignment if the stride is a + // mutiple of the pointer alignment. Otherwise, the element alignment + // should be Align(1). + unsigned PointerAlign = Alignment.valueOrOne().value(); + if (!isa(Stride) || + cast(Stride)->getZExtValue() % PointerAlign != 0) + Alignment = Align(1); + } Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment, - VPI->getMaskParam(), - VPI->getVectorLengthParam()); + VPI->getMaskParam(), VPI->getVectorLengthParam(), + Stride); break; } default: @@ -1452,13 +1473,11 @@ IsWrite, nullptr, UseCalls, Exp); } -static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, - const DataLayout &DL, Type *IntptrTy, - Value *Mask, Value *EVL, Instruction *I, - Value *Addr, MaybeAlign Alignment, - unsigned Granularity, Type *OpType, - bool IsWrite, Value *SizeArgument, - bool UseCalls, uint32_t Exp) { +void AddressSanitizer::instrumentMaskedLoadOrStore( + AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, + Value *EVL, Value *Stride, Instruction *I, Value *Addr, + MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite, + Value *SizeArgument, bool UseCalls, uint32_t Exp) { auto *VTy = cast(OpType); TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); auto Zero = ConstantInt::get(IntptrTy, 0); @@ -1482,6 +1501,10 @@ EVL = IB.CreateElementCount(IntptrTy, VTy->getElementCount()); } + // Cast Stride to IntptrTy. + if (Stride) + Stride = IB.CreateZExtOrTrunc(Stride, IntptrTy); + SplitBlockAndInsertForEachLane(EVL, LoopInsertBefore, [&](IRBuilderBase &IRB, Value *Index) { Value *MaskElem = IRB.CreateExtractElement(Mask, Index); @@ -1497,7 +1520,14 @@ IRB.SetInsertPoint(ThenTerm); } - Value *InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); + Value *InstrumentedAddress; + if (Stride) { + Index = IRB.CreateMul(Index, Stride); + Addr = IRB.CreateBitCast(Addr, Type::getInt8PtrTy(*C)); + InstrumentedAddress = IRB.CreateGEP(Type::getInt8Ty(*C), Addr, {Index}); + } else { + InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); + } doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, Alignment, Granularity, ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp); @@ -1550,8 +1580,9 @@ unsigned Granularity = 1 << Mapping.Scale; if (O.MaybeMask) { instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL, - O.getInsn(), Addr, O.Alignment, Granularity, - O.OpType, O.IsWrite, nullptr, UseCalls, Exp); + O.MaybeStride, O.getInsn(), Addr, O.Alignment, + Granularity, O.OpType, O.IsWrite, nullptr, + UseCalls, Exp); } else { doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls, diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll --- a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll @@ -323,3 +323,124 @@ tail call void @llvm.vp.store.nxv4f32.p0( %arg, ptr %p, %mask, i32 %evl) ret void } + +; Test vp strided load stores. +declare @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr, i32, , i32) +declare void @llvm.experimental.vp.strided.store.nxv4f32.i32(, ptr, i32, , i32) + +define @scalable.strided.load.nxv4f32(ptr align 4 %p, i32 %stride, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.strided.load.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64 +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64 +; CHECK-NEXT: call void @__asan_loadN(i64 [[TMP12]], i64 4) +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 13: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 14: +; CHECK-NEXT: [[RES:%.*]] = tail call @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 [[STRIDE]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret [[RES]] +; +; DISABLED-LABEL: @scalable.strided.load.nxv4f32( +; DISABLED-NEXT: [[RES:%.*]] = tail call @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 [[STRIDE:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret [[RES]] +; + %res = tail call @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 %stride, %mask, i32 %evl) + ret %res +} + +define void @scalable.strided.store.nxv4f32( %arg, ptr align 4 %p, i32 %stride, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.strided.store.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP14:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64 +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP13:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP13]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[IV]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP11]] to i64 +; CHECK-NEXT: call void @__asan_storeN(i64 [[TMP12]], i64 4) +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 13: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 14: +; CHECK-NEXT: tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32( [[ARG:%.*]], ptr [[P]], i32 [[STRIDE]], [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @scalable.strided.store.nxv4f32( +; DISABLED-NEXT: tail call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32( [[ARG:%.*]], ptr [[P:%.*]], i32 [[STRIDE:%.*]], [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.experimental.vp.strided.store.nxv4f32.i32( %arg, ptr %p, i32 %stride, %mask, i32 %evl) + ret void +} + +; Test the stride is a multiple of the pointer alignment. +define @scalable.strided.load.nxv4f32.align(ptr align 4 %p, %mask, i32 %evl) sanitize_address { +; CHECK-LABEL: @scalable.strided.load.nxv4f32.align( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement [[MASK:%.*]], i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP12]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_load4(i64 [[TMP11]]) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = tail call @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P]], i32 4, [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret [[RES]] +; +; DISABLED-LABEL: @scalable.strided.load.nxv4f32.align( +; DISABLED-NEXT: [[RES:%.*]] = tail call @llvm.experimental.vp.strided.load.nxv4f32.p0.i32(ptr [[P:%.*]], i32 4, [[MASK:%.*]], i32 [[EVL:%.*]]) +; DISABLED-NEXT: ret [[RES]] +; + %res = tail call @llvm.experimental.vp.strided.load.nxv4f32.i32(ptr %p, i32 4, %mask, i32 %evl) + ret %res +}