diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1192,6 +1192,10 @@ def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_pmul : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -158,11 +158,21 @@ defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", umax>; defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", umin>; - defm MUL_ZI : sve_int_arith_imm2<"mul", mul>; - defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; + defm MUL_ZI : sve_int_arith_imm2<"mul", mul>; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>; defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>; + // Add unpredicated alternative for the mul instruction. + def : Pat<(mul nxv16i8:$Op1, nxv16i8:$Op2), + (MUL_ZPmZ_B (PTRUE_B 31), $Op1, $Op2)>; + def : Pat<(mul nxv8i16:$Op1, nxv8i16:$Op2), + (MUL_ZPmZ_H (PTRUE_H 31), $Op1, $Op2)>; + def : Pat<(mul nxv4i32:$Op1, nxv4i32:$Op2), + (MUL_ZPmZ_S (PTRUE_S 31), $Op1, $Op2)>; + def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2), + (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>; defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>; defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; @@ -1405,15 +1415,32 @@ defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">; // SVE2 signed saturating doubling multiply high (unpredicated) - defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh">; - defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh">; + defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh", int_aarch64_sve_sqdmulh>; + defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>; // SVE2 integer multiply vectors (unpredicated) - defm MUL_ZZZ : sve2_int_mul<0b000, "mul">; - defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">; - defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">; - def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>; - + defm MUL_ZZZ : sve2_int_mul<0b000, "mul", mul>; + defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>; + defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>; + defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>; + + // Add patterns for unpredicated version of smulh and umulh. + def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)), + (SMULH_ZZZ_B $Op1, $Op2)>; + def : Pat<(nxv8i16 (int_aarch64_sve_smulh (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op1, nxv8i16:$Op2)), + (SMULH_ZZZ_H $Op1, $Op2)>; + def : Pat<(nxv4i32 (int_aarch64_sve_smulh (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op1, nxv4i32:$Op2)), + (SMULH_ZZZ_S $Op1, $Op2)>; + def : Pat<(nxv2i64 (int_aarch64_sve_smulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)), + (SMULH_ZZZ_D $Op1, $Op2)>; + def : Pat<(nxv16i8 (int_aarch64_sve_umulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)), + (UMULH_ZZZ_B $Op1, $Op2)>; + def : Pat<(nxv8i16 (int_aarch64_sve_umulh (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op1, nxv8i16:$Op2)), + (UMULH_ZZZ_H $Op1, $Op2)>; + def : Pat<(nxv4i32 (int_aarch64_sve_umulh (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op1, nxv4i32:$Op2)), + (UMULH_ZZZ_S $Op1, $Op2)>; + def : Pat<(nxv2i64 (int_aarch64_sve_umulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)), + (UMULH_ZZZ_D $Op1, $Op2)>; // SVE2 complex integer dot product (indexed) defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2621,11 +2621,22 @@ let Inst{4-0} = Zd; } -multiclass sve2_int_mul opc, string asm> { +multiclass sve2_int_mul opc, string asm, SDPatternOperator op> { def _B : sve2_int_mul<0b00, opc, asm, ZPR8>; def _H : sve2_int_mul<0b01, opc, asm, ZPR16>; def _S : sve2_int_mul<0b10, opc, asm, ZPR32>; def _D : sve2_int_mul<0b11, opc, asm, ZPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; +} + +multiclass sve2_int_mul_single opc, string asm, SDPatternOperator op> { + def _B : sve2_int_mul<0b00, opc, asm, ZPR8>; + + def : SVE_2_Op_Pat(NAME # _B)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -446,3 +446,39 @@ %res = mul %a, %splat ret %res } + +define @mul_i16_range( %a) { +; CHECK-LABEL: mul_i16_range +; CHECK: mov w[[W:[0-9]+]], #255 +; CHECK-NEXT: mov z1.h, w[[W]] +; CHECK: ptrue p0.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h + %elt = insertelement undef, i16 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i32_range( %a) { +; CHECK-LABEL: mul_i32_range +; CHECK: mov w[[W:[0-9]+]], #255 +; CHECK-NEXT: mov z1.s, w[[W]] +; CHECK: ptrue p0.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s + %elt = insertelement undef, i32 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i64_range( %a) { +; CHECK-LABEL: mul_i64_range +; CHECK: mov w[[W:[0-9]+]], #255 +; CHECK-NEXT: mov z1.d, x[[W]] +; CHECK: ptrue p0.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d + %elt = insertelement undef, i64 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} diff --git a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll --- a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll @@ -45,8 +45,8 @@ ; CHECK: smulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.smulh.nxv16i8( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -55,8 +55,8 @@ ; CHECK: smulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.smulh.nxv8i16( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -65,8 +65,8 @@ ; CHECK: smulh z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.smulh.nxv4i32( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -75,8 +75,8 @@ ; CHECK: smulh z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.smulh.nxv2i64( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -85,8 +85,8 @@ ; CHECK: umulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.umulh.nxv16i8( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -95,8 +95,8 @@ ; CHECK: umulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.umulh.nxv8i16( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -105,8 +105,8 @@ ; CHECK: umulh z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.umulh.nxv4i32( %pg, - %a, - %b) + %a, + %b) ret %out } @@ -115,8 +115,8 @@ ; CHECK: umulh z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.umulh.nxv2i64( %pg, - %a, - %b) + %a, + %b) ret %out } diff --git a/llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm-2.ll b/llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm-2.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm-2.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: not llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s - -; Numbers smaller than -127 and greater than or equal to 127 are not allowed. -; This should get lowered to a regular vector multiply and these tests should -; be updated when those patterns are added. - -define @mul_i64_neg_1( %a) { - %elt = insertelement undef, i64 255, i32 0 - %splat = shufflevector %elt, undef, zeroinitializer - %res = mul %a, %splat - ret %res -} diff --git a/llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-neg-int-arith-imm.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s - -; Numbers smaller than -127 and greater than or equal to 127 allowed for imm mul. -; This should get lowered to a regular vector multiply and these tests should -; be updated when those patterns are added. -define @mul_i64_neg_1( %a) { - %elt = insertelement undef, i64 -130, i32 0 - %splat = shufflevector %elt, undef, zeroinitializer - %res = mul %a, %splat - ret %res -} diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll @@ -0,0 +1,324 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; MUL with SPLAT +; +define @mul_i16_imm( %a) { +; CHECK-LABEL: mul_i16_imm +; CHECK: mov w[[W:[0-9]+]], #255 +; CHECK-NEXT: mov z1.h, w[[W]] +; CHECK-NEXT: mul z0.h, z0.h, z1.h + %elt = insertelement undef, i16 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i16_imm_neg( %a) { +; CHECK-LABEL: mul_i16_imm_neg +; CHECK: mov w[[W:[0-9]+]], #-200 +; CHECK-NEXT: mov z1.h, w[[W]] +; CHECK-NEXT: mul z0.h, z0.h, z1.h + %elt = insertelement undef, i16 -200, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i32_imm( %a) { +; CHECK-LABEL: mul_i32_imm +; CHECK: mov w[[W:[0-9]+]], #255 +; CHECK-NEXT: mov z1.s, w[[W]] +; CHECK-NEXT: mul z0.s, z0.s, z1.s + %elt = insertelement undef, i32 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i32_imm_neg( %a) { +; CHECK-LABEL: mul_i32_imm_neg +; CHECK: mov w[[W:[0-9]+]], #-200 +; CHECK-NEXT: mov z1.s, w[[W]] +; CHECK-NEXT: mul z0.s, z0.s, z1.s + %elt = insertelement undef, i32 -200, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i64_imm( %a) { +; CHECK-LABEL: mul_i64_imm +; CHECK: mov w[[X:[0-9]+]], #255 +; CHECK-NEXT: z1.d, x[[X]] +; CHECK-NEXT: mul z0.d, z0.d, z1.d + %elt = insertelement undef, i64 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +define @mul_i64_imm_neg( %a) { +; CHECK-LABEL: mul_i64_imm_neg +; CHECK: mov x[[X:[0-9]+]], #-200 +; CHECK-NEXT: z1.d, x[[X]] +; CHECK-NEXT: mul z0.d, z0.d, z1.d + %elt = insertelement undef, i64 -200, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = mul %a, %splat + ret %res +} + +; +; MUL (vector, unpredicated) +; +define @mul_i8( %a, + %b) { +; CHECK-LABEL: mul_i8 +; CHECK: mul z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %res = mul %a, %b + ret %res +} + +define @mul_i16( %a, + %b) { +; CHECK-LABEL: mul_i16 +; CHECK: mul z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = mul %a, %b + ret %res +} + +define @mul_i32( %a, + %b) { +; CHECK-LABEL: mul_i32 +; CHECK: mul z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = mul %a, %b + ret %res +} + +define @mul_i64( %a, + %b) { +; CHECK-LABEL: mul_i64 +; CHECK: mul z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = mul %a, %b + ret %res +} + +; +; SMULH (vector, unpredicated) +; +define @smulh_i8( %a, + %b) { +; CHECK-LABEL: smulh_i8 +; CHECK: smulh z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %res = call @llvm.aarch64.sve.smulh.nxv16i8( %sel, %a, + %b) + ret %res +} + +define @smulh_i16( %a, + %b) { +; CHECK-LABEL: smulh_i16 +; CHECK: smulh z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %res = call @llvm.aarch64.sve.smulh.nxv8i16( %sel, %a, + %b) + ret %res +} + +define @smulh_i32( %a, + %b) { +; CHECK-LABEL: smulh_i32 +; CHECK: smulh z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %res = call @llvm.aarch64.sve.smulh.nxv4i32( %sel, %a, + %b) + ret %res +} + +define @smulh_i64( %a, + %b) { +; CHECK-LABEL: smulh_i64 +; CHECK: smulh z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %res = call @llvm.aarch64.sve.smulh.nxv2i64( %sel, %a, + %b) + ret %res +} + +; +; UMULH (vector, unpredicated) +; +define @umulh_i8( %a, + %b) { +; CHECK-LABEL: umulh_i8 +; CHECK: umulh z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %res = call @llvm.aarch64.sve.umulh.nxv16i8( %sel, %a, + %b) + ret %res +} + +define @umulh_i16( %a, + %b) { +; CHECK-LABEL: umulh_i16 +; CHECK: umulh z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %res = call @llvm.aarch64.sve.umulh.nxv8i16( %sel, %a, + %b) + ret %res +} + +define @umulh_i32( %a, + %b) { +; CHECK-LABEL: umulh_i32 +; CHECK: umulh z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %res = call @llvm.aarch64.sve.umulh.nxv4i32( %sel, %a, + %b) + ret %res +} + +define @umulh_i64( %a, + %b) { +; CHECK-LABEL: umulh_i64 +; CHECK: umulh z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %sel = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %res = call @llvm.aarch64.sve.umulh.nxv2i64( %sel, %a, + %b) + ret %res +} + +; +; PMUL (vector, unpredicated) +; +define @pmul_i8( %a, + %b) { +; CHECK-LABEL: pmul_i8 +; CHECK: pmul z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pmul.nxv16i8( %a, + %b) + ret %res +} + +; +; SQDMULH (vector, unpredicated) +; +define @sqdmulh_i8( %a, + %b) { +; CHECK-LABEL: sqdmulh_i8 +; CHECK: sqdmulh z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmulh.nxv16i8( %a, + %b) + ret %res +} + +define @sqdmulh_i16( %a, + %b) { +; CHECK-LABEL: sqdmulh_i16 +; CHECK: sqdmulh z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmulh.nxv8i16( %a, + %b) + ret %res +} + +define @sqdmulh_i32( %a, + %b) { +; CHECK-LABEL: sqdmulh_i32 +; CHECK: sqdmulh z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmulh.nxv4i32( %a, + %b) + ret %res +} + +define @sqdmulh_i64( %a, + %b) { +; CHECK-LABEL: sqdmulh_i64 +; CHECK: sqdmulh z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmulh.nxv2i64( %a, + %b) + ret %res +} + +; +; SQRDMULH (vector, unpredicated) +; +define @sqrdmulh_i8( %a, + %b) { +; CHECK-LABEL: sqrdmulh_i8 +; CHECK: sqrdmulh z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrdmulh.nxv16i8( %a, + %b) + ret %res +} + +define @sqrdmulh_i16( %a, + %b) { +; CHECK-LABEL: sqrdmulh_i16 +; CHECK: sqrdmulh z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrdmulh.nxv8i16( %a, + %b) + ret %res +} + +define @sqrdmulh_i32( %a, + %b) { +; CHECK-LABEL: sqrdmulh_i32 +; CHECK: sqrdmulh z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrdmulh.nxv4i32( %a, + %b) + ret %res +} + +define @sqrdmulh_i64( %a, + %b) { +; CHECK-LABEL: sqrdmulh_i64 +; CHECK: sqrdmulh z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrdmulh.nxv2i64( %a, + %b) + ret %res +} + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +declare @llvm.aarch64.sve.pmul.nxv16i8(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv16i8(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv8i16(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv4i32(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv2i64(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv16i8(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv8i16(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv4i32(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv2i64(, )