Index: llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1183,13 +1183,29 @@ /// Fill memory range with the given origin value. void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, TypeSize TS, Align Alignment) { - unsigned Size = TS.getFixedValue(); const DataLayout &DL = F.getParent()->getDataLayout(); const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy); unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); assert(IntptrAlignment >= kMinOriginAlignment); assert(IntptrSize >= kOriginSize); + // Note: The loop based formation works for fixed length vectors too, + // however we prefer to unroll and specialize alignment below. + if (TS.isScalable()) { + Value *Size = IRB.CreateTypeSize(IRB.getInt32Ty(), TS); + Value *RoundUp = IRB.CreateAdd(Size, IRB.getInt32(kOriginSize - 1)); + Value *End = IRB.CreateUDiv(RoundUp, IRB.getInt32(kOriginSize)); + auto [InsertPt, Index] = + SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint()); + IRB.SetInsertPoint(InsertPt); + + Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index); + IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment); + return; + } + + unsigned Size = TS.getFixedValue(); + unsigned Ofs = 0; Align CurrentAlignment = Alignment; if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { @@ -1575,6 +1591,8 @@ if (ArrayType *Array = dyn_cast(V->getType())) return collapseArrayShadow(Array, V, IRB); if (isa(V->getType())) { + if (isa(V->getType())) + return convertShadowToScalar(IRB.CreateOrReduce(V), IRB); unsigned BitWidth = V->getType()->getPrimitiveSizeInBits().getFixedValue(); return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth)); Index: llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll =================================================================== --- llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll +++ llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll @@ -277,4 +277,448 @@ ret void } +define void @load.nxv1i32(ptr %p) sanitize_address { +; CHECK-LABEL: @load.nxv1i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv1i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv1i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @load.nxv1i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 4 +; CALLS-NEXT: ret void + load , ptr %p + ret void +} + +define void @load.nxv2i32(ptr %p) sanitize_address { +; CHECK-LABEL: @load.nxv2i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv2i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv2i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @load.nxv2i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 8 +; CALLS-NEXT: ret void + load , ptr %p + ret void +} + +define void @load.nxv4i32(ptr %p) sanitize_address { +; CHECK-LABEL: @load.nxv4i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv4i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv4i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @load.nxv4i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 16 +; CALLS-NEXT: ret void + load , ptr %p + ret void +} + +define void @load.nxv8i32(ptr %p) sanitize_address { +; CHECK-LABEL: @load.nxv8i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv8i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv8i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @load.nxv8i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 32 +; CALLS-NEXT: ret void + load , ptr %p + ret void +} + +define void @load.nxv16i32(ptr %p) sanitize_address { +; CHECK-LABEL: @load.nxv16i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv16i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv16i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @load.nxv16i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 512 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 64 +; CALLS-NEXT: ret void + load , ptr %p + ret void +} + + +define void @store.nxv1i32(ptr %p) sanitize_address { +; CHECK-LABEL: @store.nxv1i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv1i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 4 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv1i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv1i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0:![0-9]+]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 4 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 4 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @store.nxv1i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: store zeroinitializer, ptr [[P]], align 4 +; CALLS-NEXT: ret void + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv2i32(ptr %p) sanitize_address { +; CHECK-LABEL: @store.nxv2i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv2i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 8 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv2i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv2i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 8 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 8 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @store.nxv2i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: store zeroinitializer, ptr [[P]], align 8 +; CALLS-NEXT: ret void + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv4i32(ptr %p) sanitize_address { +; CHECK-LABEL: @store.nxv4i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv4i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 16 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv4i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 16 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 16 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @store.nxv4i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: store zeroinitializer, ptr [[P]], align 16 +; CALLS-NEXT: ret void + store zeroinitializer, ptr %p + ret void +} +define void @store.nxv8i32(ptr %p) sanitize_address { +; CHECK-LABEL: @store.nxv8i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv8i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 32 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv8i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 32 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 32 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @store.nxv8i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: store zeroinitializer, ptr [[P]], align 32 +; CALLS-NEXT: ret void + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv16i32(ptr %p) sanitize_address { +; CHECK-LABEL: @store.nxv16i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv16i32( +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 64 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv16i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv16i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 64 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 64 +; ORIGINS-NEXT: ret void +; +; CALLS-LABEL: @store.nxv16i32( +; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 512 +; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 +; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]]) +; CALLS-NEXT: store zeroinitializer, ptr [[P]], align 64 +; CALLS-NEXT: ret void + store zeroinitializer, ptr %p + ret void +}