diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1352,6 +1352,29 @@ Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); break; } + case Intrinsic::masked_expandload: + case Intrinsic::masked_compressstore: { + bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore; + unsigned OpOffset = IsWrite ? 1 : 0; + if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) + return; + auto BasePtr = CI->getOperand(OpOffset); + if (ignoreAccess(I, BasePtr)) + return; + MaybeAlign Alignment = BasePtr->getPointerAlignment(*DL); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + + IRBuilder IB(I); + Value *Mask = CI->getOperand(1 + OpOffset); + // Use the popcount of Mask as the effetive vector length. + Type *ExtTy = VectorType::get(IntptrTy, cast(Ty)); + Value *ExtMask = IB.CreateZExt(Mask, ExtTy); + Value *EVL = IB.CreateAddReduce(ExtMask); + Value *TrueMask = ConstantInt::get(Mask->getType(), 1); + Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask, + EVL); + break; + } case Intrinsic::vp_load: case Intrinsic::vp_store: case Intrinsic::experimental_vp_strided_load: diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll --- a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll @@ -437,3 +437,82 @@ tail call void @llvm.masked.scatter.nxv4f32.nxv4p0( %val, %vp, i32 4, %mask) ret void } + +declare @llvm.masked.expandload.nxv4f32(ptr, , ) +declare void @llvm.masked.compressstore.nxv4f32(, ptr, ) + +define @scalable.expandload.nxv4f32(ptr align 4 %p, %mask) sanitize_address { +; CHECK-LABEL: @scalable.expandload.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = zext [[MASK:%.*]] to +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64( [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP6]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_load4(i64 [[TMP11]]) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP7]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = tail call @llvm.masked.expandload.nxv4f32(ptr [[P]], [[MASK]], undef) +; CHECK-NEXT: ret [[RES]] +; +; DISABLED-LABEL: @scalable.expandload.nxv4f32( +; DISABLED-NEXT: [[RES:%.*]] = tail call @llvm.masked.expandload.nxv4f32(ptr [[P:%.*]], [[MASK:%.*]], undef) +; DISABLED-NEXT: ret [[RES]] +; + %res = tail call @llvm.masked.expandload.nxv4f32(ptr %p, %mask, undef) + ret %res +} + +define void @scalable.compressstore.nxv4f32(ptr align 4 %p, %arg, %mask) sanitize_address { +; CHECK-LABEL: @scalable.compressstore.nxv4f32( +; CHECK-NEXT: [[TMP1:%.*]] = zext [[MASK:%.*]] to +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64( [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP6]]) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i64 [[IV]] +; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: call void @__asan_store4(i64 [[TMP11]]) +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP7]] +; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; CHECK: .split.split: +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 13: +; CHECK-NEXT: tail call void @llvm.masked.compressstore.nxv4f32( [[ARG:%.*]], ptr [[P]], [[MASK]]) +; CHECK-NEXT: ret void +; +; DISABLED-LABEL: @scalable.compressstore.nxv4f32( +; DISABLED-NEXT: tail call void @llvm.masked.compressstore.nxv4f32( [[ARG:%.*]], ptr [[P:%.*]], [[MASK:%.*]]) +; DISABLED-NEXT: ret void +; + tail call void @llvm.masked.compressstore.nxv4f32( %arg, ptr %p, %mask) + ret void +}