diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 2 %base_ptr = bitcast * %base to i8* - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , } %res } @@ -29,7 +29,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to i8 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , } %res } @@ -41,7 +41,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 14 %base_ptr = bitcast * %base to i8 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , } %res } @@ -53,7 +53,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 3 %base_ptr = bitcast * %base to i8 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , } %res } @@ -65,7 +65,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -18 %base_ptr = bitcast * %base to i8 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , } %res } @@ -77,7 +77,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 16 %base_ptr = bitcast * %base to i8 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , } %res } @@ -90,7 +90,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 14 %base_ptr = bitcast * %base to i16 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i16.nxv8i1.p0i16( %Pg, i16 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( %Pg, i16 *%base_ptr) ret { , } %res } @@ -102,7 +102,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to half * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16f16.nxv8i1.p0f16( %Pg, half *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( %Pg, half *%base_ptr) ret { , } %res } @@ -114,7 +114,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 12 %base_ptr = bitcast * %base to bfloat * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16bf16.nxv8i1.p0bf16( %Pg, bfloat *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( %Pg, bfloat *%base_ptr) ret { , } %res } @@ -127,7 +127,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 14 %base_ptr = bitcast * %base to i32 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8i32.nxv4i1.p0i32( %Pg, i32 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( %Pg, i32 *%base_ptr) ret { , } %res } @@ -139,7 +139,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to float * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f32.nxv4i1.p0f32( %Pg, float *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( %Pg, float *%base_ptr) ret { , } %res } @@ -152,7 +152,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 14 %base_ptr = bitcast * %base to i64 * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i64.nxv2i1.p0i64( %Pg, i64 *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( %Pg, i64 *%base_ptr) ret { , } %res } @@ -164,7 +164,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -16 %base_ptr = bitcast * %base to double * - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4f64.nxv2i1.p0f64( %Pg, double *%base_ptr) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( %Pg, double *%base_ptr) ret { , } %res } @@ -177,7 +177,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 3 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -189,7 +189,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -201,7 +201,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -213,7 +213,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 4 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -225,7 +225,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 5 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -237,7 +237,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -27 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -249,7 +249,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 24 %base_ptr = bitcast * %base to i8 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , } %res } @@ -262,7 +262,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i16 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv24i16.nxv8i1.p0i16( %Pg, i16 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( %Pg, i16 *%base_ptr) ret { , , } %res } @@ -274,7 +274,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to half * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv24f16.nxv8i1.p0f16( %Pg, half *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( %Pg, half *%base_ptr) ret { , , } %res } @@ -286,7 +286,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to bfloat * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv24bf16.nxv8i1.p0bf16( %Pg, bfloat *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( %Pg, bfloat *%base_ptr) ret { , , } %res } @@ -299,7 +299,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i32 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv12i32.nxv4i1.p0i32( %Pg, i32 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( %Pg, i32 *%base_ptr) ret { , , } %res } @@ -311,7 +311,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to float * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv12f32.nxv4i1.p0f32( %Pg, float *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( %Pg, float *%base_ptr) ret { , , } %res } @@ -324,7 +324,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i64 * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv6i64.nxv2i1.p0i64( %Pg, i64 *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( %Pg, i64 *%base_ptr) ret { , , } %res } @@ -336,7 +336,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to double * - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv6f64.nxv2i1.p0f64( %Pg, double *%base_ptr) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( %Pg, double *%base_ptr) ret { , , } %res } @@ -349,7 +349,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 4 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -361,7 +361,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -373,7 +373,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -385,7 +385,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 5 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -397,7 +397,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 6 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -409,7 +409,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 7 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -428,7 +428,7 @@ ; xOFFSET = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36 %base = getelementptr , * %addr, i64 -36 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -447,7 +447,7 @@ ; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32 %base = getelementptr , * %addr, i64 32 %base_ptr = bitcast * %base to i8 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%base_ptr) ret { , , , } %res } @@ -460,7 +460,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 8 %base_ptr = bitcast * %base to i16 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv32i16.nxv8i1.p0i16( %Pg, i16 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( %Pg, i16 *%base_ptr) ret { , , , } %res } @@ -472,7 +472,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to half * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv32f16.nxv8i1.p0f16( %Pg, half *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( %Pg, half *%base_ptr) ret { , , , } %res } @@ -484,7 +484,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to bfloat * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv32bf16.nxv8i1.p0bf16( %Pg, bfloat *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( %Pg, bfloat *%base_ptr) ret { , , , } %res } @@ -497,7 +497,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i32 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i32.nxv4i1.p0i32( %Pg, i32 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( %Pg, i32 *%base_ptr) ret { , , , } %res } @@ -509,7 +509,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to float * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16f32.nxv4i1.p0f32( %Pg, float *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( %Pg, float *%base_ptr) ret { , , , } %res } @@ -522,7 +522,7 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i64 * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i64.nxv2i1.p0i64( %Pg, i64 *%base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( %Pg, i64 *%base_ptr) ret { , , , } %res } @@ -534,36 +534,36 @@ ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to double * - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f64.nxv2i1.p0f64( %Pg, double * %base_ptr) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %Pg, double * %base_ptr) ret { , , , } %res } -declare { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8(, i8*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv16i16.nxv8i1.p0i16(, i16*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv8i32.nxv4i1.p0i32(, i32*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv4i64.nxv2i1.p0i64(, i64*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv16f16.nxv8i1.p0f16(, half*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv16bf16.nxv8i1.p0bf16(, bfloat*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv8f32.nxv4i1.p0f32(, float*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv4f64.nxv2i1.p0f64(, double*) - -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8(, i8*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv24i16.nxv8i1.p0i16(, i16*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv12i32.nxv4i1.p0i32(, i32*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv6i64.nxv2i1.p0i64(, i64*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv24f16.nxv8i1.p0f16(, half*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv24bf16.nxv8i1.p0bf16(, bfloat*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv12f32.nxv4i1.p0f32(, float*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv6f64.nxv2i1.p0f64(, double*) - -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8(, i8*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv32i16.nxv8i1.p0i16(, i16*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i32.nxv4i1.p0i32(, i32*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i64.nxv2i1.p0i64(, i64*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv32f16.nxv8i1.p0f16(, half*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv32bf16.nxv8i1.p0bf16(, bfloat*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv16f32.nxv4i1.p0f32(, float*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f64.nxv2i1.p0f64(, double*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv16i8(, i8*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8i16(, i16*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv4i32(, i32*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv2i64(, i64*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8f16(, half*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16(, bfloat*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv4f32(, float*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv2f64(, double*) + +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8(, i8*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16(, i16*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32(, i32*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64(, i64*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16(, half*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16(, bfloat*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32(, float*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64(, double*) + +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8(, i8*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16(, i16*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32(, i32*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64(, i64*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16(, half*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16(, bfloat*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32(, float*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64(, double*) ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %addr2 = getelementptr i8, i8 * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8( %Pg, i8 *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( %Pg, i8 *%addr2) ret { , } %res } @@ -20,7 +20,7 @@ ; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr i16, i16 * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i16.nxv8i1.p0i16( %Pg, i16 *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( %Pg, i16 *%addr2) ret { , } %res } @@ -30,7 +30,7 @@ ; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr half, half * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16f16.nxv8i1.p0f16( %Pg, half *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( %Pg, half *%addr2) ret { , } %res } @@ -40,7 +40,7 @@ ; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv16bf16.nxv8i1.p0bf16( %Pg, bfloat *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( %Pg, bfloat *%addr2) ret { , } %res } @@ -51,7 +51,7 @@ ; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr i32, i32 * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8i32.nxv4i1.p0i32( %Pg, i32 *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( %Pg, i32 *%addr2) ret { , } %res } @@ -61,7 +61,7 @@ ; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr float, float * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f32.nxv4i1.p0f32( %Pg, float *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( %Pg, float *%addr2) ret { , } %res } @@ -72,7 +72,7 @@ ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr i64, i64 * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i64.nxv2i1.p0i64( %Pg, i64 *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( %Pg, i64 *%addr2) ret { , } %res } @@ -82,7 +82,7 @@ ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr double, double * %addr, i64 %a - %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv4f64.nxv2i1.p0f64( %Pg, double *%addr2) + %res = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( %Pg, double *%addr2) ret { , } %res } @@ -93,7 +93,7 @@ ; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %addr2 = getelementptr i8, i8 * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8( %Pg, i8 *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%addr2) ret { , , } %res } @@ -104,7 +104,7 @@ ; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr i16, i16 * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv24i16.nxv8i1.p0i16( %Pg, i16 *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( %Pg, i16 *%addr2) ret { , , } %res } @@ -114,7 +114,7 @@ ; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr half, half * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv24f16.nxv8i1.p0f16( %Pg, half *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( %Pg, half *%addr2) ret { , , } %res } @@ -124,7 +124,7 @@ ; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv24bf16.nxv8i1.p0bf16( %Pg, bfloat *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( %Pg, bfloat *%addr2) ret { , , } %res } @@ -135,7 +135,7 @@ ; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr i32, i32 * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv12i32.nxv4i1.p0i32( %Pg, i32 *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( %Pg, i32 *%addr2) ret { , , } %res } @@ -145,7 +145,7 @@ ; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr float, float * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv12f32.nxv4i1.p0f32( %Pg, float *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( %Pg, float *%addr2) ret { , , } %res } @@ -156,7 +156,7 @@ ; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr i64, i64 * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv6i64.nxv2i1.p0i64( %Pg, i64 *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( %Pg, i64 *%addr2) ret { , , } %res } @@ -166,7 +166,7 @@ ; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr double, double * %addr, i64 %a - %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv6f64.nxv2i1.p0f64( %Pg, double *%addr2) + %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( %Pg, double *%addr2) ret { , , } %res } @@ -177,7 +177,7 @@ ; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %addr2 = getelementptr i8, i8 * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8( %Pg, i8 *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%addr2) ret { , , , } %res } @@ -188,7 +188,7 @@ ; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr i16, i16 * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv32i16.nxv8i1.p0i16( %Pg, i16 *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( %Pg, i16 *%addr2) ret { , , , } %res } @@ -198,7 +198,7 @@ ; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr half, half * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv32f16.nxv8i1.p0f16( %Pg, half *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( %Pg, half *%addr2) ret { , , , } %res } @@ -208,7 +208,7 @@ ; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv32bf16.nxv8i1.p0bf16( %Pg, bfloat *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( %Pg, bfloat *%addr2) ret { , , , } %res } @@ -219,7 +219,7 @@ ; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr i32, i32 * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i32.nxv4i1.p0i32( %Pg, i32 *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( %Pg, i32 *%addr2) ret { , , , } %res } @@ -229,7 +229,7 @@ ; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr float, float * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16f32.nxv4i1.p0f32( %Pg, float *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( %Pg, float *%addr2) ret { , , , } %res } @@ -240,7 +240,7 @@ ; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr i64, i64 * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i64.nxv2i1.p0i64( %Pg, i64 *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( %Pg, i64 *%addr2) ret { , , , } %res } @@ -250,36 +250,36 @@ ; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr double, double * %addr, i64 %a - %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f64.nxv2i1.p0f64( %Pg, double *%addr2) + %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %Pg, double *%addr2) ret { , , , } %res } -declare { , } @llvm.aarch64.sve.ld2.sret.nxv32i8.nxv16i1.p0i8(, i8*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv16i16.nxv8i1.p0i16(, i16*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv8i32.nxv4i1.p0i32(, i32*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv4i64.nxv2i1.p0i64(, i64*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv16f16.nxv8i1.p0f16(, half*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv16bf16.nxv8i1.p0bf16(, bfloat*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv8f32.nxv4i1.p0f32(, float*) -declare { , } @llvm.aarch64.sve.ld2.sret.nxv4f64.nxv2i1.p0f64(, double*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv16i8(, i8*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8i16(, i16*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv4i32(, i32*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv2i64(, i64*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8f16(, half*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16(, bfloat*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv4f32(, float*) +declare { , } @llvm.aarch64.sve.ld2.sret.nxv2f64(, double*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv48i8.nxv16i1.p0i8(, i8*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv24i16.nxv8i1.p0i16(, i16*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv12i32.nxv4i1.p0i32(, i32*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv6i64.nxv2i1.p0i64(, i64*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv24f16.nxv8i1.p0f16(, half*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv24bf16.nxv8i1.p0bf16(, bfloat*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv12f32.nxv4i1.p0f32(, float*) -declare { , , } @llvm.aarch64.sve.ld3.sret.nxv6f64.nxv2i1.p0f64(, double*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8(, i8*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16(, i16*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32(, i32*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64(, i64*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16(, half*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16(, bfloat*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32(, float*) +declare { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64(, double*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv64i8.nxv16i1.p0i8(, i8*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv32i16.nxv8i1.p0i16(, i16*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i32.nxv4i1.p0i32(, i32*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i64.nxv2i1.p0i64(, i64*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv32f16.nxv8i1.p0f16(, half*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv32bf16.nxv8i1.p0bf16(, bfloat*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv16f32.nxv4i1.p0f32(, float*) -declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f64.nxv2i1.p0f64(, double*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8(, i8*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16(, i16*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32(, i32*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64(, i64*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16(, half*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16(, bfloat*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32(, float*) +declare { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64(, double*) ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+bf16" }