diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -512,14 +512,14 @@ } // Load N-element structure into N vectors (scalar base) -defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2">; -defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3">; -defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4">; +defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">; +defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">; +defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">; // Load N-element structure into N vectors (scalar base, VL displacement) -defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2">; -defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3">; -defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4">; +defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">; +defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">; +defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">; // Load one octoword and replicate (scalar base) let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8841,13 +8841,13 @@ unsigned N; switch (IntID) { - case Intrinsic::aarch64_sve_ld2: + case Intrinsic::aarch64_sve_ld2_sret: N = 2; break; - case Intrinsic::aarch64_sve_ld3: + case Intrinsic::aarch64_sve_ld3_sret: N = 3; break; - case Intrinsic::aarch64_sve_ld4: + case Intrinsic::aarch64_sve_ld4_sret: N = 4; break; default: @@ -8861,9 +8861,16 @@ Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); - - Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()}); - return Builder.CreateCall(F, { Predicate, BasePtr }); + Function *F = CGM.getIntrinsic(IntID, {VTy}); + Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); + unsigned MinElts = VTy->getElementCount().getKnownMinValue(); + Value *Ret = llvm::PoisonValue::get(RetTy); + for (unsigned I = 0; I < N; I++) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + Value *SRet = Builder.CreateExtractValue(Call, I); + Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); + } + return Ret; } Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c @@ -18,14 +18,22 @@ // CHECK-LABEL: @test_svld2_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) { @@ -38,16 +46,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c @@ -17,13 +17,21 @@ // CHECK-LABEL: @test_svld2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z13test_svld2_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] // svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base) { @@ -33,14 +41,22 @@ // CHECK-LABEL: @test_svld2_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base) { @@ -50,14 +66,22 @@ // CHECK-LABEL: @test_svld2_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] // svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base) { @@ -67,14 +91,22 @@ // CHECK-LABEL: @test_svld2_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] // svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base) { @@ -83,13 +115,21 @@ // CHECK-LABEL: @test_svld2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z13test_svld2_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] // svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base) { @@ -99,14 +139,22 @@ // CHECK-LABEL: @test_svld2_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base) { @@ -116,14 +164,22 @@ // CHECK-LABEL: @test_svld2_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] // svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base) { @@ -133,14 +189,22 @@ // CHECK-LABEL: @test_svld2_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] // svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base) { @@ -150,14 +214,22 @@ // CHECK-LABEL: @test_svld2_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base) { @@ -167,14 +239,22 @@ // CHECK-LABEL: @test_svld2_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] // svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base) { @@ -184,14 +264,22 @@ // CHECK-LABEL: @test_svld2_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] // svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) { @@ -202,15 +290,23 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z18test_svld2_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] // svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -222,16 +318,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -243,16 +347,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -264,16 +376,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -284,15 +404,23 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z18test_svld2_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] // svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -304,16 +432,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -325,16 +461,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -346,16 +490,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -367,16 +519,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -388,16 +548,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -409,16 +577,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c @@ -18,14 +18,26 @@ // CHECK-LABEL: @test_svld3_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) { @@ -37,16 +49,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c @@ -17,13 +17,25 @@ // CHECK-LABEL: @test_svld3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z13test_svld3_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP6]] // svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base) { @@ -33,14 +45,26 @@ // CHECK-LABEL: @test_svld3_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base) { @@ -50,14 +74,26 @@ // CHECK-LABEL: @test_svld3_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base) { @@ -67,14 +103,26 @@ // CHECK-LABEL: @test_svld3_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base) { @@ -83,13 +131,25 @@ // CHECK-LABEL: @test_svld3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z13test_svld3_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP6]] // svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base) { @@ -99,14 +159,26 @@ // CHECK-LABEL: @test_svld3_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base) { @@ -116,14 +188,26 @@ // CHECK-LABEL: @test_svld3_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base) { @@ -133,14 +217,26 @@ // CHECK-LABEL: @test_svld3_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base) { @@ -150,14 +246,26 @@ // CHECK-LABEL: @test_svld3_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base) { @@ -167,14 +275,26 @@ // CHECK-LABEL: @test_svld3_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base) { @@ -184,14 +304,26 @@ // CHECK-LABEL: @test_svld3_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) { @@ -202,15 +334,27 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z18test_svld3_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP8]] // svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -222,16 +366,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -243,16 +399,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -264,16 +432,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -284,15 +464,27 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z18test_svld3_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -304,16 +496,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -325,16 +529,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -346,16 +562,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -367,16 +595,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -388,16 +628,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -409,16 +661,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c @@ -18,14 +18,30 @@ // CHECK-LABEL: @test_svld4_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) { @@ -37,16 +53,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c @@ -17,13 +17,29 @@ // CHECK-LABEL: @test_svld4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z13test_svld4_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] // svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base) { @@ -33,14 +49,30 @@ // CHECK-LABEL: @test_svld4_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base) { @@ -50,14 +82,30 @@ // CHECK-LABEL: @test_svld4_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base) { @@ -67,14 +115,30 @@ // CHECK-LABEL: @test_svld4_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base) { @@ -83,13 +147,29 @@ // CHECK-LABEL: @test_svld4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z13test_svld4_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base) { @@ -99,14 +179,30 @@ // CHECK-LABEL: @test_svld4_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base) { @@ -116,14 +212,30 @@ // CHECK-LABEL: @test_svld4_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base) { @@ -133,14 +245,30 @@ // CHECK-LABEL: @test_svld4_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base) { @@ -150,14 +278,30 @@ // CHECK-LABEL: @test_svld4_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base) { @@ -167,14 +311,30 @@ // CHECK-LABEL: @test_svld4_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base) { @@ -184,14 +344,30 @@ // CHECK-LABEL: @test_svld4_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) { @@ -202,15 +378,31 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] // // CPP-CHECK-LABEL: @_Z18test_svld4_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP10]] // svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -222,16 +414,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -243,16 +451,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP11]] // svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -264,16 +488,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP11]] // svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -284,15 +524,31 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] // // CPP-CHECK-LABEL: @_Z18test_svld4_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP10]] // svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -304,16 +560,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -325,16 +597,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP11]] // svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -346,16 +634,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP11]] // svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -367,16 +671,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -388,16 +708,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP11]] // svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -409,16 +745,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP11]] // svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -946,10 +946,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". - class AdvSIMD_ManyVec_PredLoad_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>], - [IntrReadMem, IntrArgMemOnly]>; - class AdvSIMD_1Vec_PredLoad_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1529,10 +1525,6 @@ def int_aarch64_sve_ld1 : AdvSIMD_1Vec_PredLoad_Intrinsic; -def int_aarch64_sve_ld2 : AdvSIMD_ManyVec_PredLoad_Intrinsic; -def int_aarch64_sve_ld3 : AdvSIMD_ManyVec_PredLoad_Intrinsic; -def int_aarch64_sve_ld4 : AdvSIMD_ManyVec_PredLoad_Intrinsic; - def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic; def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic; def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1029,8 +1029,6 @@ SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const; - SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef LoadOps, - EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13403,61 +13403,6 @@ return true; } -// Lower an SVE structured load intrinsic returning a tuple type to target -// specific intrinsic taking the same input but returning a multi-result value -// of the split tuple type. -// -// E.g. Lowering an LD3: -// -// call @llvm.aarch64.sve.ld3.nxv12i32( -// %pred, -// * %addr) -// -// Output DAG: -// -// t0: ch = EntryToken -// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0 -// t4: i64,ch = CopyFromReg t0, Register:i64 %1 -// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4 -// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2 -// -// This is called pre-legalization to avoid widening/splitting issues with -// non-power-of-2 tuple types used for LD3, such as nxv12i32. -SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic, - ArrayRef LoadOps, - EVT VT, SelectionDAG &DAG, - const SDLoc &DL) const { - assert(VT.isScalableVector() && "Can only lower scalable vectors"); - - unsigned N, Opcode; - static const std::pair> - IntrinsicMap[] = { - {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, - {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, - {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; - - std::tie(N, Opcode) = llvm::find_if(IntrinsicMap, [&](auto P) { - return P.first == Intrinsic; - })->second; - assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && - "invalid tuple vector type!"); - - EVT SplitVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorElementCount().divideCoefficientBy(N)); - assert(isTypeLegal(SplitVT)); - - SmallVector VTs(N, SplitVT); - VTs.push_back(MVT::Other); // Chain - SDVTList NodeTys = DAG.getVTList(VTs); - - SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps); - SmallVector PseudoLoadOps; - for (unsigned I = 0; I < N; ++I) - PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps); -} - EVT AArch64TargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat); @@ -19852,20 +19797,6 @@ /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); - case Intrinsic::aarch64_sve_ld2: - case Intrinsic::aarch64_sve_ld3: - case Intrinsic::aarch64_sve_ld4: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Mask = N->getOperand(2); - SDValue BasePtr = N->getOperand(3); - SDValue LoadOps[] = {Chain, Mask, BasePtr}; - unsigned IntrinsicID = - cast(N->getOperand(1))->getZExtValue(); - SDValue Result = - LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL); - return DAG.getMergeValues({Result, Chain}, DL); - } case Intrinsic::aarch64_rndr: case Intrinsic::aarch64_rndrrs: { unsigned IntrinsicID = diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -9,30 +9,87 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind { ; CHECK-LABEL: foo1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld4d { z8.d, z9.d, z10.d, z11.d }, p0/z, [x0] +; CHECK-NEXT: ld4d { z17.d, z18.d, z19.d, z20.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z16.d }, p0/z, [x2] +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov z4.d, z8.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: mov z4.d, z9.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov z4.d, z10.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #6 +; CHECK-NEXT: mov z4.d, z11.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov z4.d, z17.d +; CHECK-NEXT: mov z21.d, z0.d +; CHECK-NEXT: mov z22.d, z1.d +; CHECK-NEXT: mov z23.d, z2.d +; CHECK-NEXT: mov z8.d, z3.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: mov z4.d, z18.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov z4.d, z19.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #6 +; CHECK-NEXT: mov z4.d, z20.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov z6.d, z0.d ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] -; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] -; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: mov z7.d, z1.d +; CHECK-NEXT: mov z17.d, z2.d +; CHECK-NEXT: mov z18.d, z3.d +; CHECK-NEXT: mov z1.d, z21.d +; CHECK-NEXT: mov z2.d, z22.d +; CHECK-NEXT: mov z3.d, z23.d +; CHECK-NEXT: mov z4.d, z8.d +; CHECK-NEXT: mov z5.d, z16.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z16.d }, p0, [sp] -; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1d { z6.d }, p0, [sp] +; CHECK-NEXT: st1d { z7.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z17.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: bl callee1 ; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp], #48 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %1 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %0) - %2 = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %1, double* %x0) - %3 = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %1, double* %x1) + %2 = call {, , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %1, double* %x0) + %3 = call {, , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %1, double* %x1) %4 = call @llvm.aarch64.sve.ld1.nxv2f64( %1, double* %x2) - %call = call float @callee1(float 1.000000e+00, %2, %3, %4) + %5 = extractvalue { , , , } %2, 0 + %6 = extractvalue { , , , } %2, 1 + %7 = extractvalue { , , , } %2, 2 + %8 = extractvalue { , , , } %2, 3 + %9 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( poison, %5, i64 0) + %10 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %9, %6, i64 2) + %11 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %10, %7, i64 4) + %12 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %11, %8, i64 6) + %13 = extractvalue { , , , } %3, 0 + %14 = extractvalue { , , , } %3, 1 + %15 = extractvalue { , , , } %3, 2 + %16 = extractvalue { , , , } %3, 3 + %17 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( poison, %13, i64 0) + %18 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %17, %14, i64 2) + %19 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %18, %15, i64 4) + %20 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %19, %16, i64 6) + %call = call float @callee1(float 1.000000e+00, %12, %20, %4) ret float %call } @@ -43,27 +100,63 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x0] +; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x1] +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov z4.d, z16.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: mov z4.d, z17.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov z4.d, z18.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #6 +; CHECK-NEXT: mov z4.d, z19.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov z4.d, z16.d +; CHECK-NEXT: mov z20.d, z0.d +; CHECK-NEXT: mov z21.d, z1.d +; CHECK-NEXT: mov z22.d, z2.d +; CHECK-NEXT: mov z23.d, z3.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: mov z4.d, z17.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov z4.d, z18.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #6 +; CHECK-NEXT: mov z4.d, z19.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 ; CHECK-NEXT: add x9, sp, #16 -; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] -; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] +; CHECK-NEXT: mov z5.d, z0.d ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: st1d { z5.d }, p0, [x9] +; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 -; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: st1d { z6.d }, p0, [x9, #1, mul vl] ; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w3, #3 ; CHECK-NEXT: mov w4, #4 ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 -; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl] -; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl] +; CHECK-NEXT: st1d { z7.d }, p0, [x9, #2, mul vl] ; CHECK-NEXT: add x9, sp, #16 -; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl] +; CHECK-NEXT: mov z16.d, z3.d +; CHECK-NEXT: mov z1.d, z20.d +; CHECK-NEXT: mov z2.d, z21.d +; CHECK-NEXT: mov z3.d, z22.d +; CHECK-NEXT: mov z4.d, z23.d +; CHECK-NEXT: st1d { z16.d }, p0, [x9, #3, mul vl] ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 @@ -73,39 +166,104 @@ entry: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %1 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %0) - %2 = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %1, double* %x0) - %3 = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %1, double* %x1) - %call = call float @callee2(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, float 1.000000e+00, %2, %3) + %2 = call {, , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %1, double* %x0) + %3 = call {, , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %1, double* %x1) + %4 = extractvalue { , , , } %2, 0 + %5 = extractvalue { , , , } %2, 1 + %6 = extractvalue { , , , } %2, 2 + %7 = extractvalue { , , , } %2, 3 + %8 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( poison, %4, i64 0) + %9 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %8, %5, i64 2) + %10 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %9, %6, i64 4) + %11 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %10, %7, i64 6) + %12 = extractvalue { , , , } %2, 0 + %13 = extractvalue { , , , } %2, 1 + %14 = extractvalue { , , , } %2, 2 + %15 = extractvalue { , , , } %2, 3 + %16 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( poison, %12, i64 0) + %17 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %16, %13, i64 2) + %18 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %17, %14, i64 4) + %19 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %18, %15, i64 6) + %call = call float @callee2(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, float 1.000000e+00, %11, %19) ret float %call } define float @foo3(double* %x0, double* %x1, double* %x2) nounwind { ; CHECK-LABEL: foo3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ptrue p4.b +; CHECK-NEXT: mov x19, x2 +; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p4/z, [x0] +; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p4/z, [x1] +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov z4.d, z16.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: mov z4.d, z17.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov z4.d, z18.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov w0, #6 +; CHECK-NEXT: mov z4.d, z19.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv8f64.nx2f64 +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov z23.d, z3.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z20.d, z0.d +; CHECK-NEXT: mov z21.d, z1.d +; CHECK-NEXT: mov z22.d, z2.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv6f64.nx2f64 +; CHECK-NEXT: mov w0, #2 +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv6f64.nx2f64 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: bl llvm.aarch64.vector.insert.nxv6f64.nx2f64 +; CHECK-NEXT: ld1d { z6.d }, p4/z, [x19] +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: mov z16.d, z1.d ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0] -; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1] -; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] ; CHECK-NEXT: fmov s1, #2.00000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: mov z17.d, z2.d +; CHECK-NEXT: mov z2.d, z20.d +; CHECK-NEXT: mov z3.d, z21.d +; CHECK-NEXT: mov z4.d, z22.d +; CHECK-NEXT: mov z5.d, z23.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z16.d }, p0, [sp] -; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z7.d }, p0, [sp] +; CHECK-NEXT: st1d { z16.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z17.d }, p0, [sp, #2, mul vl] ; CHECK-NEXT: bl callee3 ; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %1 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %0) - %2 = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( %1, double* %x0) - %3 = call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( %1, double* %x1) - %4 = call @llvm.aarch64.sve.ld1.nxv2f64( %1, double* %x2) - %call = call float @callee3(float 1.000000e+00, float 2.000000e+00, %2, %3, %4) + %2 = call {, , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %1, double* %x0) + %3 = call {, , } @llvm.aarch64.sve.ld3.sret.nxv2f64( %1, double* %x1) + %4 = extractvalue { , , , } %2, 0 + %5 = extractvalue { , , , } %2, 1 + %6 = extractvalue { , , , } %2, 2 + %7 = extractvalue { , , , } %2, 3 + %8 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( poison, %4, i64 0) + %9 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %8, %5, i64 2) + %10 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %9, %6, i64 4) + %11 = call @llvm.aarch64.vector.insert.nxv8f64.nx2f64( %10, %7, i64 6) + %12 = extractvalue { , , } %3, 0 + %13 = extractvalue { , , } %3, 1 + %14 = extractvalue { , , } %3, 2 + %15 = call @llvm.aarch64.vector.insert.nxv6f64.nx2f64( poison, %4, i64 0) + %16 = call @llvm.aarch64.vector.insert.nxv6f64.nx2f64( %15 , %4, i64 2) + %17 = call @llvm.aarch64.vector.insert.nxv6f64.nx2f64( %16 , %4, i64 4) + %18 = call @llvm.aarch64.sve.ld1.nxv2f64( %1, double* %x2) + %call = call float @callee3(float 1.000000e+00, float 2.000000e+00, %11, %17, %18) ret float %call } @@ -381,9 +539,9 @@ declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() -declare @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1(, double*) -declare @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1(, double*) +declare {, , , } @llvm.aarch64.sve.ld4.sret.nxv2f64(, double*) +declare {, , } @llvm.aarch64.sve.ld3.sret.nxv2f64(, double*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(, i32 immarg) +declare @llvm.aarch64.vector.insert.nxv8f64.nx2f64(, , i64) +declare @llvm.aarch64.vector.insert.nxv6f64.nx2f64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+imm-addr-mode.ll +++ /dev/null @@ -1,539 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s - -; NOTE: invalid, upper and lower bound immediate values of the regimm -; addressing mode are checked only for the byte version of each -; instruction (`ldb`), as the code for detecting the immediate is -; common to all instructions, and varies only for the number of -; elements of the structure store, which is = 2, 3, 4. - -; ld2b -define @ld2.nxv32i8( %Pg, *%addr) { -; CHECK-LABEL: ld2.nxv32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 2 -%base_ptr = bitcast * %base to i8* -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld2.nxv32i8_lower_bound( %Pg, *%addr) { -; CHECK-LABEL: ld2.nxv32i8_lower_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, #-16, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -16 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld2.nxv32i8_upper_bound( %Pg, *%addr) { -; CHECK-LABEL: ld2.nxv32i8_upper_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, #14, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 14 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld2.nxv32i8_not_multiple_of_2( %Pg, *%addr) { -; CHECK-LABEL: ld2.nxv32i8_not_multiple_of_2: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #3 -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 3 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld2.nxv32i8_outside_lower_bound( %Pg, *%addr) { -; CHECK-LABEL: ld2.nxv32i8_outside_lower_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #-18 -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -18 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld2.nxv32i8_outside_upper_bound( %Pg, *%addr) { -; CHECK-LABEL: ld2.nxv32i8_outside_upper_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #16 -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 16 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -; ld2h -define @ld2.nxv16i16( %Pg, * %addr) { -; CHECK-LABEL: ld2.nxv16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, #14, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 14 -%base_ptr = bitcast * %base to i16 * -%res = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16( %Pg, i16 *%base_ptr) -ret %res -} - -define @ld2.nxv16f16( %Pg, * %addr) { -; CHECK-LABEL: ld2.nxv16f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, #-16, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -16 -%base_ptr = bitcast * %base to half * -%res = call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16( %Pg, half *%base_ptr) -ret %res -} - -define @ld2.nxv16bf16( %Pg, * %addr) #0 { -; CHECK-LABEL: ld2.nxv16bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, #12, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 12 -%base_ptr = bitcast * %base to bfloat * -%res = call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16( %Pg, bfloat *%base_ptr) -ret %res -} - -; ld2w -define @ld2.nxv8i32( %Pg, * %addr) { -; CHECK-LABEL: ld2.nxv8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, #14, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 14 -%base_ptr = bitcast * %base to i32 * -%res = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32( %Pg, i32 *%base_ptr) -ret %res -} - -define @ld2.nxv8f32( %Pg, * %addr) { -; CHECK-LABEL: ld2.nxv8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, #-16, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -16 -%base_ptr = bitcast * %base to float * -%res = call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32( %Pg, float *%base_ptr) -ret %res -} - -; ld2d -define @ld2.nxv4i64( %Pg, * %addr) { -; CHECK-LABEL: ld2.nxv4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, #14, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 14 -%base_ptr = bitcast * %base to i64 * -%res = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64( %Pg, i64 *%base_ptr) -ret %res -} - -define @ld2.nxv4f64( %Pg, * %addr) { -; CHECK-LABEL: ld2.nxv4f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, #-16, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -16 -%base_ptr = bitcast * %base to double * -%res = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64( %Pg, double *%base_ptr) -ret %res -} - -; ld3b -define @ld3.nxv48i8( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #3, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 3 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld3.nxv48i8_lower_bound( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8_lower_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #-24, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -24 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld3.nxv48i8_upper_bound( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8_upper_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, #21, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 21 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld3.nxv48i8_not_multiple_of_3_01( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_01: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #4 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 4 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld3.nxv48i8_not_multiple_of_3_02( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_02: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 5 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld3.nxv48i8_outside_lower_bound( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8_outside_lower_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #-27 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -27 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld3.nxv48i8_outside_upper_bound( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv48i8_outside_upper_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #24 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 24 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -; ld3h -define @ld3.nxv24i16( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv24i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #21, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 21 -%base_ptr = bitcast * %base to i16 * -%res = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16( %Pg, i16 *%base_ptr) -ret %res -} - -define @ld3.nxv24f16( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv24f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #21, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 21 -%base_ptr = bitcast * %base to half * -%res = call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16( %Pg, half *%base_ptr) -ret %res -} - -define @ld3.nxv24bf16( %Pg, *%addr) #0 { -; CHECK-LABEL: ld3.nxv24bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, #-24, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -24 -%base_ptr = bitcast * %base to bfloat * -%res = call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16( %Pg, bfloat *%base_ptr) -ret %res -} - -; ld3w -define @ld3.nxv12i32( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv12i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, #21, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 21 -%base_ptr = bitcast * %base to i32 * -%res = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32( %Pg, i32 *%base_ptr) -ret %res -} - -define @ld3.nxv12f32( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv12f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, #-24, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -24 -%base_ptr = bitcast * %base to float * -%res = call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32( %Pg, float *%base_ptr) -ret %res -} - -; ld3d -define @ld3.nxv6i64( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv6i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, #21, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 21 -%base_ptr = bitcast * %base to i64 * -%res = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64( %Pg, i64 *%base_ptr) -ret %res -} - -define @ld3.nxv6f64( %Pg, *%addr) { -; CHECK-LABEL: ld3.nxv6f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, #-24, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -24 -%base_ptr = bitcast * %base to double * -%res = call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64( %Pg, double *%base_ptr) -ret %res -} - -; ; ld4b -define @ld4.nxv64i8( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #4, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 4 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_lower_bound( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_lower_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #-32, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -32 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_upper_bound( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_upper_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, #28, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 28 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_not_multiple_of_4_01( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_01: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 5 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_not_multiple_of_4_02( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_02: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #6 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 6 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_not_multiple_of_4_03( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_03: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #7 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 7 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_outside_lower_bound( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_outside_lower_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov x9, #-576 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9) -; xM = -9 * 2^6 -; xP = RDVL * 2^-4 -; xOFFSET = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36 -%base = getelementptr , * %addr, i64 -36 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -define @ld4.nxv64i8_outside_upper_bound( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv64i8_outside_upper_bound: -; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov w9, #512 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] -; CHECK-NEXT: ret -; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #16) #2) -; xM = 2^9 -; xP = RDVL * 2^-4 -; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32 -%base = getelementptr , * %addr, i64 32 -%base_ptr = bitcast * %base to i8 * -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) -ret %res -} - -; ld4h -define @ld4.nxv32i16( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #8, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 8 -%base_ptr = bitcast * %base to i16 * -%res = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16( %Pg, i16 *%base_ptr) -ret %res -} - -define @ld4.nxv32f16( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv32f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #28, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 28 -%base_ptr = bitcast * %base to half * -%res = call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16( %Pg, half *%base_ptr) -ret %res -} - -define @ld4.nxv32bf16( %Pg, *%addr) #0 { -; CHECK-LABEL: ld4.nxv32bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, #-32, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -32 -%base_ptr = bitcast * %base to bfloat * -%res = call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16( %Pg, bfloat *%base_ptr) -ret %res -} - -; ld4w -define @ld4.nxv16i32( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, #28, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 28 -%base_ptr = bitcast * %base to i32 * -%res = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32( %Pg, i32 *%base_ptr) -ret %res -} - -define @ld4.nxv16f32( %Pg, * %addr) { -; CHECK-LABEL: ld4.nxv16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, #-32, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -32 -%base_ptr = bitcast * %base to float * -%res = call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32( %Pg, float *%base_ptr) -ret %res -} - -; ld4d -define @ld4.nxv8i64( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, #28, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 28 -%base_ptr = bitcast * %base to i64 * -%res = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64( %Pg, i64 *%base_ptr) -ret %res -} - -define @ld4.nxv8f64( %Pg, *%addr) { -; CHECK-LABEL: ld4.nxv8f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, #-32, mul vl] -; CHECK-NEXT: ret -%base = getelementptr , * %addr, i64 -32 -%base_ptr = bitcast * %base to double * -%res = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64( %Pg, double * %base_ptr) -ret %res -} - -declare @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(, double*) - -declare @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(, double*) - -declare @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(, double*) - -; +bf16 is required for the bfloat version. -attributes #0 = { "target-features"="+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-reg+reg-addr-mode.ll +++ /dev/null @@ -1,285 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s - -; ld2b -define @ld2.nxv32i8( %Pg, i8 *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x1] -; CHECK-NEXT: ret -%addr2 = getelementptr i8, i8 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%addr2) -ret %res -} - -; ld2h -define @ld2.nxv16i16( %Pg, i16 *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr i16, i16 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16( %Pg, i16 *%addr2) -ret %res -} - -define @ld2.nxv16f16( %Pg, half *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv16f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr half, half * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16( %Pg, half *%addr2) -ret %res -} - -define @ld2.nxv16bf16( %Pg, bfloat *%addr, i64 %a) #0 { -; CHECK-LABEL: ld2.nxv16bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr bfloat, bfloat * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16( %Pg, bfloat *%addr2) -ret %res -} - -; ld2w -define @ld2.nxv8i32( %Pg, i32 *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2] -; CHECK-NEXT: ret -%addr2 = getelementptr i32, i32 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32( %Pg, i32 *%addr2) -ret %res -} - -define @ld2.nxv8f32( %Pg, float *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2] -; CHECK-NEXT: ret -%addr2 = getelementptr float, float * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32( %Pg, float *%addr2) -ret %res -} - -; ld2d -define @ld2.nxv4i64( %Pg, i64 *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3] -; CHECK-NEXT: ret -%addr2 = getelementptr i64, i64 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64( %Pg, i64 *%addr2) -ret %res -} - -define @ld2.nxv4f64( %Pg, double *%addr, i64 %a) { -; CHECK-LABEL: ld2.nxv4f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3] -; CHECK-NEXT: ret -%addr2 = getelementptr double, double * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64( %Pg, double *%addr2) -ret %res -} - -; ld3b -define @ld3.nxv48i8( %Pg, i8 *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv48i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1] -; CHECK-NEXT: ret -%addr2 = getelementptr i8, i8 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%addr2) -ret %res -} - -; ld3h -define @ld3.nxv24i16( %Pg, i16 *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv24i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr i16, i16 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16( %Pg, i16 *%addr2) -ret %res -} - -define @ld3.nxv24f16( %Pg, half *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv24f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr half, half * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16( %Pg, half *%addr2) -ret %res -} - -define @ld3.nxv24bf16( %Pg, bfloat *%addr, i64 %a) #0 { -; CHECK-LABEL: ld3.nxv24bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr bfloat, bfloat * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16( %Pg, bfloat *%addr2) -ret %res -} - -; ld3w -define @ld3.nxv12i32( %Pg, i32 *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv12i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2] -; CHECK-NEXT: ret -%addr2 = getelementptr i32, i32 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32( %Pg, i32 *%addr2) -ret %res -} - -define @ld3.nxv12f32( %Pg, float *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv12f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2] -; CHECK-NEXT: ret -%addr2 = getelementptr float, float * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32( %Pg, float *%addr2) -ret %res -} - -; ld3d -define @ld3.nxv6i64( %Pg, i64 *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv6i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3] -; CHECK-NEXT: ret -%addr2 = getelementptr i64, i64 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64( %Pg, i64 *%addr2) -ret %res -} - -define @ld3.nxv6f64( %Pg, double *%addr, i64 %a) { -; CHECK-LABEL: ld3.nxv6f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3] -; CHECK-NEXT: ret -%addr2 = getelementptr double, double * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64( %Pg, double *%addr2) -ret %res -} - -; ld4b -define @ld4.nxv64i8( %Pg, i8 *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1] -; CHECK-NEXT: ret -%addr2 = getelementptr i8, i8 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%addr2) -ret %res -} - -; ld4h -define @ld4.nxv32i16( %Pg, i16 *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr i16, i16 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16( %Pg, i16 *%addr2) -ret %res -} - -define @ld4.nxv32f16( %Pg, half *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv32f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr half, half * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16( %Pg, half *%addr2) -ret %res -} - -define @ld4.nxv32bf16( %Pg, bfloat *%addr, i64 %a) #0 { -; CHECK-LABEL: ld4.nxv32bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] -; CHECK-NEXT: ret -%addr2 = getelementptr bfloat, bfloat * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16( %Pg, bfloat *%addr2) -ret %res -} - -; ld4w -define @ld4.nxv16i32( %Pg, i32 *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2] -; CHECK-NEXT: ret -%addr2 = getelementptr i32, i32 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32( %Pg, i32 *%addr2) -ret %res -} - -define @ld4.nxv16f32( %Pg, float *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2] -; CHECK-NEXT: ret -%addr2 = getelementptr float, float * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32( %Pg, float *%addr2) -ret %res -} - -; ld4d -define @ld4.nxv8i64( %Pg, i64 *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3] -; CHECK-NEXT: ret -%addr2 = getelementptr i64, i64 * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64( %Pg, i64 *%addr2) -ret %res -} - -define @ld4.nxv8f64( %Pg, double *%addr, i64 %a) { -; CHECK-LABEL: ld4.nxv8f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3] -; CHECK-NEXT: ret -%addr2 = getelementptr double, double * %addr, i64 %a -%res = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64( %Pg, double *%addr2) -ret %res -} - -declare @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(, double*) - -declare @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(, double*) - -declare @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(, double*) - -; +bf16 is required for the bfloat version. -attributes #0 = { "target-features"="+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -307,270 +307,6 @@ ret %res } -; -; LD2B -; - -define @ld2b_i8( %pred, i8* %addr) { -; CHECK-LABEL: ld2b_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8( %pred, i8* %addr) - ret %res -} - -; -; LD2H -; - -define @ld2h_i16( %pred, i16* %addr) { -; CHECK-LABEL: ld2h_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16( %pred, i16* %addr) - ret %res -} - -define @ld2h_f16( %pred, half* %addr) { -; CHECK-LABEL: ld2h_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16( %pred, half* %addr) - ret %res -} - -define @ld2h_bf16( %pred, bfloat* %addr) { -; CHECK-LABEL: ld2h_bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16( %pred, bfloat* %addr) - ret %res -} - -; -; LD2W -; - -define @ld2w_i32( %pred, i32* %addr) { -; CHECK-LABEL: ld2w_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32( %pred, i32* %addr) - ret %res -} - -define @ld2w_f32( %pred, float* %addr) { -; CHECK-LABEL: ld2w_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32( %pred, float* %addr) - ret %res -} - -; -; LD2D -; - -define @ld2d_i64( %pred, i64* %addr) { -; CHECK-LABEL: ld2d_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64( %pred, i64* %addr) - ret %res -} - -define @ld2d_f64( %pred, double* %addr) { -; CHECK-LABEL: ld2d_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64( %pred, double* %addr) - ret %res -} - -; -; LD3B -; - -define @ld3b_i8( %pred, i8* %addr) { -; CHECK-LABEL: ld3b_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8( %pred, i8* %addr) - ret %res -} - -; -; LD3H -; - -define @ld3h_i16( %pred, i16* %addr) { -; CHECK-LABEL: ld3h_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16( %pred, i16* %addr) - ret %res -} - -define @ld3h_f16( %pred, half* %addr) { -; CHECK-LABEL: ld3h_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16( %pred, half* %addr) - ret %res -} - -define @ld3h_bf16( %pred, bfloat* %addr) { -; CHECK-LABEL: ld3h_bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16( %pred, bfloat* %addr) - ret %res -} - -; -; LD3W -; - -define @ld3w_i32( %pred, i32* %addr) { -; CHECK-LABEL: ld3w_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32( %pred, i32* %addr) - ret %res -} - -define @ld3w_f32( %pred, float* %addr) { -; CHECK-LABEL: ld3w_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32( %pred, float* %addr) - ret %res -} - -; -; LD3D -; - -define @ld3d_i64( %pred, i64* %addr) { -; CHECK-LABEL: ld3d_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64( %pred, i64* %addr) - ret %res -} - -define @ld3d_f64( %pred, double* %addr) { -; CHECK-LABEL: ld3d_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64( %pred, double* %addr) - ret %res -} - -; -; LD4B -; - -define @ld4b_i8( %pred, i8* %addr) { -; CHECK-LABEL: ld4b_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8( %pred, i8* %addr) - ret %res -} - -; -; LD4H -; - -define @ld4h_i16( %pred, i16* %addr) { -; CHECK-LABEL: ld4h_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16( %pred, i16* %addr) - ret %res -} - -define @ld4h_f16( %pred, half* %addr) { -; CHECK-LABEL: ld4h_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16( %pred, half* %addr) - ret %res -} - -define @ld4h_bf16( %pred, bfloat* %addr) { -; CHECK-LABEL: ld4h_bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16( %pred, bfloat* %addr) - ret %res -} - -; -; LD4W -; - -define @ld4w_i32( %pred, i32* %addr) { -; CHECK-LABEL: ld4w_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32( %pred, i32* %addr) - ret %res -} - -define @ld4w_f32( %pred, float* %addr) { -; CHECK-LABEL: ld4w_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32( %pred, float* %addr) - ret %res -} - -; -; LD4D -; - -define @ld4d_i64( %pred, i64* %addr) { -; CHECK-LABEL: ld4d_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64( %pred, i64* %addr) - ret %res -} - -define @ld4d_f64( %pred, double* %addr) { -; CHECK-LABEL: ld4d_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64( %pred, double* %addr) - ret %res -} - declare @llvm.aarch64.sve.ld1rq.nxv16i8(, i8*) declare @llvm.aarch64.sve.ld1rq.nxv8i16(, i16*) @@ -590,29 +326,3 @@ declare @llvm.aarch64.sve.ldnt1.nxv4f32(, float*) declare @llvm.aarch64.sve.ldnt1.nxv2f64(, double*) -declare @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(, double*) - -declare @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(, double*) - -declare @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(, i8*) -declare @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(, i16*) -declare @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(, i32*) -declare @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(, i64*) -declare @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(, half*) -declare @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(, bfloat*) -declare @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(, float*) -declare @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(, double*) diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll @@ -1,29 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s %complex = type { { double, double } } -; Function Attrs: argmemonly nounwind readonly -declare @llvm.vector.extract.nxv2f64.nxv4f64(, i64 immarg) #3 ; Function Attrs: argmemonly nounwind readonly -declare @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(, double*) #3 +declare { , } @llvm.aarch64.sve.ld2.sret.nxv2f64(, double*) #3 ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2 define void @foo1(%complex* %outval, %pred, double *%inptr) { ; CHECK-LABEL: foo1: -; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1] -; CHECK-NEXT: faddv d2, p0, z0.d -; CHECK-NEXT: faddv d0, p0, z1.d -; CHECK-NEXT: mov v2.d[1], v0.d[0] -; CHECK-NEXT: str q2, [x0] +; CHECK: // %bb.0: +; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x1] +; CHECK-NEXT: faddv d2, p0, z0.d +; CHECK-NEXT: faddv d0, p0, z1.d +; CHECK-NEXT: mov v2.d[1], v0.d[0] +; CHECK-NEXT: str q2, [x0] +; CHECK-NEXT: ret %realp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 0 %imagp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 1 - %1 = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( %pred, double* nonnull %inptr) - %2 = call @llvm.vector.extract.nxv2f64.nxv4f64( %1, i64 0) + %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( %pred, double* nonnull %inptr) + %2 = extractvalue { , } %1, 0 %3 = call double @llvm.aarch64.sve.faddv.nxv2f64( %pred, %2) - %4 = call @llvm.vector.extract.nxv2f64.nxv4f64( %1, i64 2) + %4 = extractvalue { , }%1, 1 %5 = call double @llvm.aarch64.sve.faddv.nxv2f64( %pred, %4) store double %3, double* %realp, align 8 store double %5, double* %imagp, align 8