diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1208,8 +1208,6 @@ def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_pmul : AdvSIMD_2VectorArg_Intrinsic; -def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic; -def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; @@ -1666,6 +1664,31 @@ def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_Intrinsic; // +// SVE2 - Uniform DSP operations +// + +def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_sqdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_sqneg : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sqrdmlah : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_sqrdmlah_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_sqrdmlsh : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic; + +// // SVE2 - Non-widening pairwise arithmetic // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1396,23 +1396,23 @@ let Predicates = [HasSVE2] in { // SVE2 integer multiply-add (indexed) - defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">; - defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">; + defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", null_frag>; + defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", null_frag>; // SVE2 saturating multiply-add high (indexed) - defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">; - defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">; + defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah_lane>; + defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh_lane>; // SVE2 saturating multiply-add high (vectors, unpredicated) - defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah">; - defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh">; + defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah>; + defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh>; // SVE2 integer multiply (indexed) - defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul">; + defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul", null_frag>; // SVE2 saturating multiply high (indexed) - defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh">; - defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">; + defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh", int_aarch64_sve_sqdmulh_lane>; + defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh", int_aarch64_sve_sqrdmulh_lane>; // SVE2 signed saturating doubling multiply high (unpredicated) defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh", int_aarch64_sve_sqdmulh>; @@ -1503,14 +1503,14 @@ defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">; // SVE2 integer halving add/subtract (predicated) - defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", null_frag>; - defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", null_frag>; - defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", null_frag>; - defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", null_frag>; - defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", null_frag>; - defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", null_frag>; - defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", null_frag>; - defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", null_frag>; + defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", int_aarch64_sve_shadd>; + defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", int_aarch64_sve_uhadd>; + defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>; + defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>; + defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", int_aarch64_sve_srhadd>; + defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", int_aarch64_sve_urhadd>; + defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>; + defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>; // SVE2 integer pairwise add and accumulate long defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>; @@ -1524,10 +1524,10 @@ defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>; // SVE2 integer unary operations (predicated) - defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">; - defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte">; - defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs">; - defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">; + defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe", int_aarch64_sve_urecpe>; + defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte", int_aarch64_sve_ursqrte>; + defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs", int_aarch64_sve_sqabs>; + defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg", int_aarch64_sve_sqneg>; // SVE2 saturating add/subtract defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2340,11 +2340,16 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_mla { +multiclass sve2_int_mla { def _B : sve2_int_mla<0b00, { 0b1110, S }, asm, ZPR8, ZPR8>; def _H : sve2_int_mla<0b01, { 0b1110, S }, asm, ZPR16, ZPR16>; def _S : sve2_int_mla<0b10, { 0b1110, S }, asm, ZPR32, ZPR32>; def _D : sve2_int_mla<0b11, { 0b1110, S }, asm, ZPR64, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve2_int_mla_long opc, string asm> { @@ -2376,26 +2381,31 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_mla_by_indexed_elem opc, bit S, string asm> { - def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> { +multiclass sve2_int_mla_by_indexed_elem opc, bit S, string asm, + SDPatternOperator op> { + def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> { + def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> { + def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : SVE_4_Op_Imm_Pat(NAME # _H)>; + def : SVE_4_Op_Imm_Pat(NAME # _S)>; + def : SVE_4_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2662,26 +2672,31 @@ let Inst{4-0} = Zd; } -multiclass sve2_int_mul_by_indexed_elem opc, string asm> { - def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> { +multiclass sve2_int_mul_by_indexed_elem opc, string asm, + SDPatternOperator op> { + def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> { + def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> { + def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } multiclass sve2_int_mul_long_by_indexed_elem opc, string asm> { @@ -2797,15 +2812,22 @@ let ElementSize = zprty.ElementSize; } -multiclass sve2_int_un_pred_arit_s opc, string asm> { +multiclass sve2_int_un_pred_arit_s opc, string asm, + SDPatternOperator op> { def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>; + def : SVE_3_Op_Pat(NAME # _S)>; } -multiclass sve2_int_un_pred_arit opc, string asm> { +multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> { def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>; def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>; def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>; def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll @@ -0,0 +1,869 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s + +; +; SHADD +; + +define @shadd_i8( %pg, %a, %b) { +; CHECK-LABEL: shadd_i8: +; CHECK: shadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @shadd_i16( %pg, %a, %b) { +; CHECK-LABEL: shadd_i16: +; CHECK: shadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @shadd_i32( %pg, %a, %b) { +; CHECK-LABEL: shadd_i32: +; CHECK: shadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @shadd_i64( %pg, %a, %b) { +; CHECK-LABEL: shadd_i64: +; CHECK: shadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SHSUB +; + +define @shsub_i8( %pg, %a, %b) { +; CHECK-LABEL: shsub_i8: +; CHECK: shsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsub.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @shsub_i16( %pg, %a, %b) { +; CHECK-LABEL: shsub_i16: +; CHECK: shsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsub.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @shsub_i32( %pg, %a, %b) { +; CHECK-LABEL: shsub_i32: +; CHECK: shsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsub.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @shsub_i64( %pg, %a, %b) { +; CHECK-LABEL: shsub_i64: +; CHECK: shsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsub.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SHSUBR +; + +define @shsubr_i8( %pg, %a, %b) { +; CHECK-LABEL: shsubr_i8: +; CHECK: shsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsubr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @shsubr_i16( %pg, %a, %b) { +; CHECK-LABEL: shsubr_i16: +; CHECK: shsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsubr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @shsubr_i32( %pg, %a, %b) { +; CHECK-LABEL: shsubr_i32: +; CHECK: shsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsubr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @shsubr_i64( %pg, %a, %b) { +; CHECK-LABEL: shsubr_i64: +; CHECK: shsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.shsubr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SQABS +; + +define @sqabs_i8( %a, %pg, %b) { +; CHECK-LABEL: sqabs_i8: +; CHECK: sqabs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqabs.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @sqabs_i16( %a, %pg, %b) { +; CHECK-LABEL: sqabs_i16: +; CHECK: sqabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqabs.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @sqabs_i32( %a, %pg, %b) { +; CHECK-LABEL: sqabs_i32: +; CHECK: sqabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqabs.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @sqabs_i64( %a, %pg, %b) { +; CHECK-LABEL: sqabs_i64: +; CHECK: sqabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqabs.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; SQDMULH (Vector) +; + +define @sqdmulh_i8( %a, %b) { +; CHECK-LABEL: sqdmulh_i8: +; CHECK: sqdmulh z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.nxv16i8( %a, + %b) + ret %out +} + +define @sqdmulh_i16( %a, %b) { +; CHECK-LABEL: sqdmulh_i16: +; CHECK: sqdmulh z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.nxv8i16( %a, + %b) + ret %out +} + +define @sqdmulh_i32( %a, %b) { +; CHECK-LABEL: sqdmulh_i32: +; CHECK: sqdmulh z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.nxv4i32( %a, + %b) + ret %out +} + +define @sqdmulh_i64( %a, %b) { +; CHECK-LABEL: sqdmulh_i64: +; CHECK: sqdmulh z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.nxv2i64( %a, + %b) + ret %out +} + +; +; SQDMULH (Indexed) +; + +define @sqdmulh_lane_i16( %a, %b) { +; CHECK-LABEL: sqdmulh_lane_i16: +; CHECK: sqdmulh z0.h, z0.h, z1.h[7] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.lane.nxv8i16( %a, + %b, + i32 7) + ret %out +} + +define @sqdmulh_lane_i32( %a, %b) { +; CHECK-LABEL: sqdmulh_lane_i32: +; CHECK: sqdmulh z0.s, z0.s, z1.s[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.lane.nxv4i32( %a, + %b, + i32 3); + ret %out +} + +define @sqdmulh_lane_i64( %a, %b) { +; CHECK-LABEL: sqdmulh_lane_i64: +; CHECK: sqdmulh z0.d, z0.d, z1.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmulh.lane.nxv2i64( %a, + %b, + i32 1) + ret %out +} + +; +; SQNEG +; + +define @sqneg_i8( %a, %pg, %b) { +; CHECK-LABEL: sqneg_i8: +; CHECK: sqneg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqneg.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @sqneg_i16( %a, %pg, %b) { +; CHECK-LABEL: sqneg_i16: +; CHECK: sqneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqneg.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @sqneg_i32( %a, %pg, %b) { +; CHECK-LABEL: sqneg_i32: +; CHECK: sqneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqneg.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @sqneg_i64( %a, %pg, %b) { +; CHECK-LABEL: sqneg_i64: +; CHECK: sqneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqneg.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; SQRDMALH (Vectors) +; + +define @sqrdmlah_i8( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_i8: +; CHECK: sqrdmlah z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.nxv16i8( %a, + %b, + %c) + ret %out +} + +define @sqrdmlah_i16( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_i16: +; CHECK: sqrdmlah z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @sqrdmlah_i32( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_i32: +; CHECK: sqrdmlah z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sqrdmlah_i64( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_i64: +; CHECK: sqrdmlah z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; SQRDMALH (Indexed) +; + +define @sqrdmlah_lane_i16( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_lane_i16: +; CHECK: sqrdmlah z0.h, z1.h, z2.h[5] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16( %a, + %b, + %c, + i32 5) + ret %out +} + +define @sqrdmlah_lane_i32( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_lane_i32: +; CHECK: sqrdmlah z0.s, z1.s, z2.s[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32( %a, + %b, + %c, + i32 1); + ret %out +} + +define @sqrdmlah_lane_i64( %a, %b, %c) { +; CHECK-LABEL: sqrdmlah_lane_i64: +; CHECK: sqrdmlah z0.d, z1.d, z2.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64( %a, + %b, + %c, + i32 1) + ret %out +} + +; +; SQRDMSLH (Vectors) +; + +define @sqrdmlsh_i8( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_i8: +; CHECK: sqrdmlsh z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.nxv16i8( %a, + %b, + %c) + ret %out +} + +define @sqrdmlsh_i16( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_i16: +; CHECK: sqrdmlsh z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @sqrdmlsh_i32( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_i32: +; CHECK: sqrdmlsh z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sqrdmlsh_i64( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_i64: +; CHECK: sqrdmlsh z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; SQRDMSLH (Indexed) +; + +define @sqrdmlsh_lane_i16( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_lane_i16: +; CHECK: sqrdmlsh z0.h, z1.h, z2.h[4] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16( %a, + %b, + %c, + i32 4) + ret %out +} + +define @sqrdmlsh_lane_i32( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_lane_i32: +; CHECK: sqrdmlsh z0.s, z1.s, z2.s[0] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32( %a, + %b, + %c, + i32 0); + ret %out +} + +define @sqrdmlsh_lane_i64( %a, %b, %c) { +; CHECK-LABEL: sqrdmlsh_lane_i64: +; CHECK: sqrdmlsh z0.d, z1.d, z2.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64( %a, + %b, + %c, + i32 1) + ret %out +} + +; +; SQRDMULH (Vectors) +; + +define @sqrdmulh_i8( %a, %b) { +; CHECK-LABEL: sqrdmulh_i8: +; CHECK: sqrdmulh z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.nxv16i8( %a, + %b) + ret %out +} + +define @sqrdmulh_i16( %a, %b) { +; CHECK-LABEL: sqrdmulh_i16: +; CHECK: sqrdmulh z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.nxv8i16( %a, + %b) + ret %out +} + +define @sqrdmulh_i32( %a, %b) { +; CHECK-LABEL: sqrdmulh_i32: +; CHECK: sqrdmulh z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.nxv4i32( %a, + %b) + ret %out +} + +define @sqrdmulh_i64( %a, %b) { +; CHECK-LABEL: sqrdmulh_i64: +; CHECK: sqrdmulh z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.nxv2i64( %a, + %b) + ret %out +} + +; +; SQRDMULH (Indexed) +; + +define @sqrdmulh_lane_i16( %a, %b) { +; CHECK-LABEL: sqrdmulh_lane_i16: +; CHECK: sqrdmulh z0.h, z0.h, z1.h[6] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16( %a, + %b, + i32 6) + ret %out +} + +define @sqrdmulh_lane_i32( %a, %b) { +; CHECK-LABEL: sqrdmulh_lane_i32: +; CHECK: sqrdmulh z0.s, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32( %a, + %b, + i32 2); + ret %out +} + +define @sqrdmulh_lane_i64( %a, %b) { +; CHECK-LABEL: sqrdmulh_lane_i64: +; CHECK: sqrdmulh z0.d, z0.d, z1.d[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64( %a, + %b, + i32 1) + ret %out +} + +; +; SRHADD +; + +define @srhadd_i8( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i8: +; CHECK: srhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srhadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @srhadd_i16( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i16: +; CHECK: srhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srhadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @srhadd_i32( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i32: +; CHECK: srhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srhadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @srhadd_i64( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i64: +; CHECK: srhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srhadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UHADD +; + +define @uhadd_i8( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i8: +; CHECK: uhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uhadd_i16( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i16: +; CHECK: uhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uhadd_i32( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i32: +; CHECK: uhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uhadd_i64( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i64: +; CHECK: uhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UHSUB +; + +define @uhsub_i8( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i8: +; CHECK: uhsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uhsub_i16( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i16: +; CHECK: uhsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uhsub_i32( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i32: +; CHECK: uhsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uhsub_i64( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i64: +; CHECK: uhsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UHSUBR +; + +define @uhsubr_i8( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i8: +; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uhsubr_i16( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i16: +; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uhsubr_i32( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i32: +; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uhsubr_i64( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i64: +; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; URECPE +; + +define @urecpe_i32( %a, %pg, %b) { +; CHECK-LABEL: urecpe_i32: +; CHECK: urecpe z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urecpe.nxv4i32( %a, + %pg, + %b) + ret %out +} + +; +; URHADD +; + +define @urhadd_i8( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i8: +; CHECK: urhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @urhadd_i16( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i16: +; CHECK: urhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @urhadd_i32( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i32: +; CHECK: urhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @urhadd_i64( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i64: +; CHECK: urhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; URSQRTE +; + +define @ursqrte_i32( %a, %pg, %b) { +; CHECK-LABEL: ursqrte_i32: +; CHECK: ursqrte z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.shadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.shadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.shadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.shadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.shsub.nxv16i8(, , ) +declare @llvm.aarch64.sve.shsub.nxv8i16(, , ) +declare @llvm.aarch64.sve.shsub.nxv4i32(, , ) +declare @llvm.aarch64.sve.shsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.shsubr.nxv16i8(, , ) +declare @llvm.aarch64.sve.shsubr.nxv8i16(, , ) +declare @llvm.aarch64.sve.shsubr.nxv4i32(, , ) +declare @llvm.aarch64.sve.shsubr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqabs.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqabs.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqabs.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqabs.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqdmulh.nxv16i8(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv8i16(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv4i32(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv2i64(, ) + +declare @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqneg.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqneg.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqneg.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqneg.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqrdmlah.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqrdmlah.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqrdmlah.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqrdmlah.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(, , , i32) +declare @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.sqrdmlsh.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqrdmlsh.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqrdmlsh.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqrdmlsh.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(, , , i32) +declare @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.sqrdmulh.nxv16i8(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv8i16(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv4i32(, ) +declare @llvm.aarch64.sve.sqrdmulh.nxv2i64(, ) + +declare @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.srhadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.srhadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.srhadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.srhadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uhadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.uhadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.uhadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.uhadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uhsub.nxv16i8(, , ) +declare @llvm.aarch64.sve.uhsub.nxv8i16(, , ) +declare @llvm.aarch64.sve.uhsub.nxv4i32(, , ) +declare @llvm.aarch64.sve.uhsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uhsubr.nxv16i8(, , ) +declare @llvm.aarch64.sve.uhsubr.nxv8i16(, , ) +declare @llvm.aarch64.sve.uhsubr.nxv4i32(, , ) +declare @llvm.aarch64.sve.uhsubr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.urecpe.nxv4i32(, , ) + +declare @llvm.aarch64.sve.urhadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.urhadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.urhadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.urhadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.ursqrte.nxv4i32(, , )