diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9028,7 +9028,10 @@ BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); - Value *Load = Builder.CreateCall(F, {Predicate, BasePtr}); + auto *Load = + cast(Builder.CreateCall(F, {Predicate, BasePtr})); + auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); + CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) : Builder.CreateSExt(Load, VectorTy); @@ -9056,7 +9059,11 @@ BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); - return Builder.CreateCall(F, {Val, Predicate, BasePtr}); + auto *Store = + cast(Builder.CreateCall(F, {Val, Predicate, BasePtr})); + auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); + CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); + return Store; } // Limit the usage of scalable llvm IR generated by the ACLE by using the diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c @@ -17,7 +17,7 @@ // CHECK-LABEL: @test_svld1_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP0]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP0]], i32 1, [[PG:%.*]], zeroinitializer), !tbaa ![[CHAR:[0-9]+]] // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z13test_svld1_s8u10__SVBool_tPKa( @@ -35,7 +35,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[SHORT:[0-9]+]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_s16u10__SVBool_tPKs( @@ -54,7 +54,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[INT:[0-9]+]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_s32u10__SVBool_tPKi( @@ -73,7 +73,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[LONG:[0-9]+]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_s64u10__SVBool_tPKl( @@ -91,7 +91,7 @@ // CHECK-LABEL: @test_svld1_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP0]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: [[TMP1:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP0]], i32 1, [[PG:%.*]], zeroinitializer), !tbaa ![[CHAR]] // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z13test_svld1_u8u10__SVBool_tPKh( @@ -109,7 +109,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[SHORT]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_u16u10__SVBool_tPKt( @@ -128,7 +128,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[INT]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_u32u10__SVBool_tPKj( @@ -147,7 +147,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[LONG]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_u64u10__SVBool_tPKm( @@ -166,7 +166,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv8f16.p0nxv8f16(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv8f16.p0nxv8f16(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[HALF:[0-9]+]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_f16u10__SVBool_tPKDh( @@ -185,7 +185,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[FLOAT:[0-9]+]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_f32u10__SVBool_tPKf( @@ -204,7 +204,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * -// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2f64.p0nxv2f64(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2f64.p0nxv2f64(* [[TMP1]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[DOUBLE:[0-9]+]] // CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svld1_f64u10__SVBool_tPKd( @@ -224,7 +224,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to * -// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer), !tbaa ![[CHAR]] // CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svld1_vnum_s8u10__SVBool_tPKal( @@ -246,7 +246,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[SHORT]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s16u10__SVBool_tPKsl( @@ -269,7 +269,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[INT]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s32u10__SVBool_tPKil( @@ -292,7 +292,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[LONG]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s64u10__SVBool_tPKll( @@ -314,7 +314,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to * -// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv16i8.p0nxv16i8(* [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer), !tbaa ![[CHAR]] // CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svld1_vnum_u8u10__SVBool_tPKhl( @@ -336,7 +336,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[SHORT]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u16u10__SVBool_tPKtl( @@ -359,7 +359,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[INT]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u32u10__SVBool_tPKjl( @@ -382,7 +382,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[LONG]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u64u10__SVBool_tPKml( @@ -405,7 +405,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv8f16.p0nxv8f16(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv8f16.p0nxv8f16(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[HALF]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f16u10__SVBool_tPKDhl( @@ -428,7 +428,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[FLOAT]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f32u10__SVBool_tPKfl( @@ -451,7 +451,7 @@ // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to * -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv2f64.p0nxv2f64(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.masked.load.nxv2f64.p0nxv2f64(* [[TMP3]], i32 1, [[TMP0]], zeroinitializer), !tbaa ![[DOUBLE]] // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f64u10__SVBool_tPKdl( @@ -1151,3 +1151,18 @@ svfloat64_t test_svld1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index) { return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_f64, )(pg, bases, index); } + +// CHECK-DAG: ![[CHAR_TY:[0-9]+]] = !{!"omnipotent char", !8, i64 0} +// CHECK-DAG: ![[CHAR]] = !{![[CHAR_TY]], ![[CHAR_TY]], i64 0} +// CHECK-DAG: ![[SHORT_TY:[0-9]+]] = !{!"short", !{{.*}}, i64 0} +// CHECK-DAG: ![[SHORT]] = !{![[SHORT_TY]], ![[SHORT_TY]], i64 0} +// CHECK-DAG: ![[INT_TY:[0-9]+]] = !{!"int", !7, i64 0} +// CHECK-DAG: ![[INT]] = !{![[INT_TY]], ![[INT_TY]], i64 0} +// CHECK-DAG: ![[LONG_TY:[0-9]+]] = !{!"long", !7, i64 0} +// CHECK-DAG: ![[LONG]] = !{![[LONG_TY]], ![[LONG_TY]], i64 0} +// CHECK-DAG: ![[HALF_TY:[0-9]+]] = !{!"__fp16", !7, i64 0} +// CHECK-DAG: ![[HALF]] = !{![[HALF_TY]], ![[HALF_TY]], i64 0} +// CHECK-DAG: ![[FLOAT_TY:[0-9]+]] = !{!"float", !7, i64 0} +// CHECK-DAG: ![[FLOAT]] = !{![[FLOAT_TY]], ![[FLOAT_TY]], i64 0} +// CHECK-DAG: ![[DOUBLE_TY:[0-9]+]] = !{!"double", !7, i64 0} +// CHECK-DAG: ![[DOUBLE]] = !{![[DOUBLE_TY]], ![[DOUBLE_TY]], i64 0} diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -862,12 +862,14 @@ if (isAllActivePredicate(Pred)) { LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr); + Load->copyMetadata(II); return IC.replaceInstUsesWith(II, Load); } CallInst *MaskedLoad = Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL), Pred, ConstantAggregateZero::get(VecTy)); + MaskedLoad->copyMetadata(II); return IC.replaceInstUsesWith(II, MaskedLoad); } @@ -883,12 +885,14 @@ Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo()); if (isAllActivePredicate(Pred)) { - Builder.CreateStore(VecOp, VecPtr); + StoreInst *Store = Builder.CreateStore(VecOp, VecPtr); + Store->copyMetadata(II); return IC.eraseInstFromFunction(II); } - Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL), - Pred); + CallInst *MaskedStore = Builder.CreateMaskedStore( + VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred); + MaskedStore->copyMetadata(II); return IC.eraseInstFromFunction(II); } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll @@ -6,24 +6,24 @@ define @combine_ld1(i32* %ptr) #0 { ; CHECK-LABEL: @combine_ld1( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !annotation !0 ; CHECK-NEXT: ret [[TMP2]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr) + %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr), !annotation !0 ret %2 } define @combine_ld1_casted_predicate(i32* %ptr) #0 { ; CHECK-LABEL: @combine_ld1_casted_predicate( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, !annotation !0 ; CHECK-NEXT: ret [[TMP2]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) %3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %2) - %4 = call @llvm.aarch64.sve.ld1.nxv4i32( %3, i32* %ptr) + %4 = call @llvm.aarch64.sve.ld1.nxv4i32( %3, i32* %ptr), !annotation !0 ret %4 } @@ -31,11 +31,11 @@ ; CHECK-LABEL: @combine_ld1_masked( ; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to * -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP2]], i32 1, [[TMP1]], zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP2]], i32 1, [[TMP1]], zeroinitializer), !annotation !0 ; CHECK-NEXT: ret [[TMP3]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) - %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr) + %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr), !annotation !0 ret %2 } @@ -45,37 +45,37 @@ ; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to * -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP4]], i32 1, [[TMP3]], zeroinitializer) +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[TMP4]], i32 1, [[TMP3]], zeroinitializer), !annotation !0 ; CHECK-NEXT: ret [[TMP5]] ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) %3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2) - %4 = call @llvm.aarch64.sve.ld1.nxv8i16( %3, i16* %ptr) + %4 = call @llvm.aarch64.sve.ld1.nxv8i16( %3, i16* %ptr), !annotation !0 ret %4 } define void @combine_st1( %vec, i32* %ptr) #0 { ; CHECK-LABEL: @combine_st1( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * -; CHECK-NEXT: store [[VEC:%.*]], * [[TMP1]], align 16 +; CHECK-NEXT: store [[VEC:%.*]], * [[TMP1]], align 16, !annotation !0 ; CHECK-NEXT: ret void ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr), !annotation !0 ret void } define void @combine_st1_casted_predicate( %vec, i32* %ptr) #0 { ; CHECK-LABEL: @combine_st1_casted_predicate( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * -; CHECK-NEXT: store [[VEC:%.*]], * [[TMP1]], align 16 +; CHECK-NEXT: store [[VEC:%.*]], * [[TMP1]], align 16, !annotation !0 ; CHECK-NEXT: ret void ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) %3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %2) - call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %3, i32* %ptr) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %3, i32* %ptr), !annotation !0 ret void } @@ -83,11 +83,11 @@ ; CHECK-LABEL: @combine_st1_masked( ; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[VEC:%.*]], * [[TMP2]], i32 1, [[TMP1]]) +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[VEC:%.*]], * [[TMP2]], i32 1, [[TMP1]]), !annotation !0 ; CHECK-NEXT: ret void ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) - call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr), !annotation !0 ret void } @@ -97,13 +97,13 @@ ; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16( [[VEC:%.*]], * [[TMP4]], i32 1, [[TMP3]]) +; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16( [[VEC:%.*]], * [[TMP4]], i32 1, [[TMP3]]), !annotation !0 ; CHECK-NEXT: ret void ; %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) %3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2) - call void @llvm.aarch64.sve.st1.nxv8i16( %vec, %3, i16* %ptr) + call void @llvm.aarch64.sve.st1.nxv8i16( %vec, %3, i16* %ptr), !annotation !0 ret void } @@ -124,3 +124,5 @@ declare void @llvm.aarch64.sve.st1.nxv8i16(, , i16*) attributes #0 = { "target-features"="+sve" } + +!0 = !{!"some metadata"}