diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -512,14 +512,14 @@ } // Load N-element structure into N vectors (scalar base) -defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2">; -defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3">; -defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4">; +defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">; +defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">; +defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">; // Load N-element structure into N vectors (scalar base, VL displacement) -defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2">; -defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3">; -defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4">; +defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">; +defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">; +defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">; // Load one octoword and replicate (scalar base) let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8849,13 +8849,13 @@ unsigned N; switch (IntID) { - case Intrinsic::aarch64_sve_ld2: + case Intrinsic::aarch64_sve_ld2_sret: N = 2; break; - case Intrinsic::aarch64_sve_ld3: + case Intrinsic::aarch64_sve_ld3_sret: N = 3; break; - case Intrinsic::aarch64_sve_ld4: + case Intrinsic::aarch64_sve_ld4_sret: N = 4; break; default: @@ -8869,9 +8869,16 @@ Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); - - Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()}); - return Builder.CreateCall(F, { Predicate, BasePtr }); + Function *F = CGM.getIntrinsic(IntID, {VTy}); + Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); + unsigned MinElts = VTy->getMinNumElements(); + Value *Ret = llvm::PoisonValue::get(RetTy); + for (unsigned I = 0; I < N; I++) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + Value *SRet = Builder.CreateExtractValue(Call, I); + Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); + } + return Ret; } Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c @@ -18,14 +18,22 @@ // CHECK-LABEL: @test_svld2_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) { @@ -38,16 +46,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c @@ -17,13 +17,21 @@ // CHECK-LABEL: @test_svld2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z13test_svld2_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] // svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base) { @@ -33,14 +41,22 @@ // CHECK-LABEL: @test_svld2_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base) { @@ -50,14 +66,22 @@ // CHECK-LABEL: @test_svld2_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] // svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base) { @@ -67,14 +91,22 @@ // CHECK-LABEL: @test_svld2_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] // svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base) { @@ -83,13 +115,21 @@ // CHECK-LABEL: @test_svld2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z13test_svld2_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] // svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base) { @@ -99,14 +139,22 @@ // CHECK-LABEL: @test_svld2_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base) { @@ -116,14 +164,22 @@ // CHECK-LABEL: @test_svld2_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] // svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base) { @@ -133,14 +189,22 @@ // CHECK-LABEL: @test_svld2_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] // svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base) { @@ -150,14 +214,22 @@ // CHECK-LABEL: @test_svld2_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] // svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base) { @@ -167,14 +239,22 @@ // CHECK-LABEL: @test_svld2_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] // svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base) { @@ -184,14 +264,22 @@ // CHECK-LABEL: @test_svld2_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] // svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) { @@ -202,15 +290,23 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z18test_svld2_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] // svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -222,16 +318,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -243,16 +347,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -264,16 +376,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -284,15 +404,23 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z18test_svld2_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] // svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -304,16 +432,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -325,16 +461,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -346,16 +490,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -367,16 +519,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -388,16 +548,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -409,16 +577,24 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c @@ -18,14 +18,26 @@ // CHECK-LABEL: @test_svld3_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) { @@ -37,16 +49,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c @@ -17,13 +17,25 @@ // CHECK-LABEL: @test_svld3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z13test_svld3_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP6]] // svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base) { @@ -33,14 +45,26 @@ // CHECK-LABEL: @test_svld3_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base) { @@ -50,14 +74,26 @@ // CHECK-LABEL: @test_svld3_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base) { @@ -67,14 +103,26 @@ // CHECK-LABEL: @test_svld3_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base) { @@ -83,13 +131,25 @@ // CHECK-LABEL: @test_svld3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: ret [[TMP6]] // // CPP-CHECK-LABEL: @_Z13test_svld3_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP6]] // svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base) { @@ -99,14 +159,26 @@ // CHECK-LABEL: @test_svld3_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base) { @@ -116,14 +188,26 @@ // CHECK-LABEL: @test_svld3_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base) { @@ -133,14 +217,26 @@ // CHECK-LABEL: @test_svld3_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base) { @@ -150,14 +246,26 @@ // CHECK-LABEL: @test_svld3_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base) { @@ -167,14 +275,26 @@ // CHECK-LABEL: @test_svld3_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base) { @@ -184,14 +304,26 @@ // CHECK-LABEL: @test_svld3_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: ret [[TMP7]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP7]] // svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) { @@ -202,15 +334,27 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z18test_svld3_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP8]] // svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -222,16 +366,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -243,16 +399,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -264,16 +432,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -284,15 +464,27 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z18test_svld3_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -304,16 +496,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -325,16 +529,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -346,16 +562,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -367,16 +595,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -388,16 +628,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -409,16 +661,28 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c @@ -18,14 +18,30 @@ // CHECK-LABEL: @test_svld4_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) { @@ -37,16 +53,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1( [[TMP0]], bfloat* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], bfloat* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c @@ -17,13 +17,29 @@ // CHECK-LABEL: @test_svld4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z13test_svld4_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] // svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base) { @@ -33,14 +49,30 @@ // CHECK-LABEL: @test_svld4_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base) { @@ -50,14 +82,30 @@ // CHECK-LABEL: @test_svld4_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base) { @@ -67,14 +115,30 @@ // CHECK-LABEL: @test_svld4_s64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] // svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base) { @@ -83,13 +147,29 @@ // CHECK-LABEL: @test_svld4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z13test_svld4_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base) { @@ -99,14 +179,30 @@ // CHECK-LABEL: @test_svld4_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base) { @@ -116,14 +212,30 @@ // CHECK-LABEL: @test_svld4_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base) { @@ -133,14 +245,30 @@ // CHECK-LABEL: @test_svld4_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] // svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base) { @@ -150,14 +278,30 @@ // CHECK-LABEL: @test_svld4_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base) { @@ -167,14 +311,30 @@ // CHECK-LABEL: @test_svld4_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base) { @@ -184,14 +344,30 @@ // CHECK-LABEL: @test_svld4_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] // svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) { @@ -202,15 +378,31 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] // // CPP-CHECK-LABEL: @_Z18test_svld4_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP10]] // svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -222,16 +414,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -243,16 +451,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP11]] // svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -264,16 +488,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP11]] // svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -284,15 +524,31 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] // // CPP-CHECK-LABEL: @_Z18test_svld4_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[TMP0]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1( [[PG:%.*]], i8* [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], i8* [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP10]] // svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -304,16 +560,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1( [[TMP0]], i16* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], i16* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -325,16 +597,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1( [[TMP0]], i32* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], i32* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP11]] // svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -346,16 +634,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1( [[TMP0]], i64* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], i64* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP11]] // svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -367,16 +671,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1( [[TMP0]], half* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], half* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP11]] // svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -388,16 +708,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1( [[TMP0]], float* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], float* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP11]] // svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -409,16 +745,32 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) +// CHECK-NEXT: ret [[TMP11]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to * // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , * [[TMP1]], i64 [[VNUM:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1( [[TMP0]], double* [[TMP2]]) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], double* [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP11]] // svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) {