diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1675,10 +1675,13 @@ // SVE2 - Uniform DSP operations // +def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sqadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_sqdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_sqneg : AdvSIMD_Merged1VectorArg_Intrinsic; @@ -1688,13 +1691,35 @@ def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic; +def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_srshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_srshr : AdvSIMD_SVE_ShiftByImm_Intrinsic; +def int_aarch64_sve_srsra : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_ssra : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_suqadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uaba : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_urshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_urshr : AdvSIMD_SVE_ShiftByImm_Intrinsic; def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_ursra : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_usqadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_usra : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2 - Non-widening pairwise arithmetic diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -698,6 +698,37 @@ let ParserMatchClass = Imm0_63Operand; } +// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant +// (ImmLeaf) +def tvecshiftL8 : Operand, TImmLeaf { + let EncoderMethod = "getVecShiftL8OpValue"; + let DecoderMethod = "DecodeVecShiftL8Imm"; + let ParserMatchClass = Imm0_7Operand; +} +def tvecshiftL16 : Operand, TImmLeaf { + let EncoderMethod = "getVecShiftL16OpValue"; + let DecoderMethod = "DecodeVecShiftL16Imm"; + let ParserMatchClass = Imm0_15Operand; +} +def tvecshiftL32 : Operand, TImmLeaf { + let EncoderMethod = "getVecShiftL32OpValue"; + let DecoderMethod = "DecodeVecShiftL32Imm"; + let ParserMatchClass = Imm0_31Operand; +} +def tvecshiftL64 : Operand, TImmLeaf { + let EncoderMethod = "getVecShiftL64OpValue"; + let DecoderMethod = "DecodeVecShiftL64Imm"; + let ParserMatchClass = Imm0_63Operand; +} + // Crazy immediate formats used by 32-bit and 64-bit logical immediate // instructions for splatting repeating bit patterns across the immediate. def logical_imm32_XFORM : SDNodeXForm; // SVE2 saturating add/subtract - defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>; - defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", null_frag>; - defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", null_frag>; - defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", null_frag>; - defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", null_frag>; - defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", null_frag>; - defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", null_frag>; - defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", null_frag>; + defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>; + defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", int_aarch64_sve_uqadd>; + defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", int_aarch64_sve_sqsub>; + defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", int_aarch64_sve_uqsub>; + defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", int_aarch64_sve_suqadd>; + defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", int_aarch64_sve_usqadd>; + defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", int_aarch64_sve_sqsubr>; + defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>; // SVE2 saturating/rounding bitwise shift left (predicated) - defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", null_frag>; - defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", null_frag>; + defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>; + defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>; defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>; defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>; - defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", null_frag>; - defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", null_frag>; - defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", null_frag>; - defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", null_frag>; + defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>; + defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>; + defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>; + defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>; defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>; defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>; defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>; // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", int_aarch64_sve_urshr>; + defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; @@ -1595,22 +1595,22 @@ defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">; - defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">; + defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", int_aarch64_sve_sri>; + defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">; - defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">; - defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">; - defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">; // SVE2 integer absolute difference and accumulate - defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">; - defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">; + defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>; + defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>; // SVE2 integer absolute difference and accumulate long defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2988,7 +2988,8 @@ let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_shift_imm_left { +multiclass sve2_int_bin_shift_imm_left { def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; @@ -3000,9 +3001,15 @@ let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -multiclass sve2_int_bin_shift_imm_right { +multiclass sve2_int_bin_shift_imm_right { def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; @@ -3014,6 +3021,11 @@ let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } class sve2_int_bin_accum_shift_imm tsz8_64, bits<2> opc, string asm, @@ -3039,7 +3051,8 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_bin_accum_shift_imm_right opc, string asm> { +multiclass sve2_int_bin_accum_shift_imm_right opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; @@ -3051,6 +3064,11 @@ let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } class sve2_int_cadd sz, bit opc, string asm, ZPRRegOp zprty> @@ -3101,11 +3119,16 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_absdiff_accum { +multiclass sve2_int_absdiff_accum { def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>; def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>; def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>; def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve2_int_absdiff_accum_long opc, string asm> { @@ -4391,8 +4414,7 @@ // SVE Bitwise Shift - Predicated Group //===----------------------------------------------------------------------===// class sve_int_bin_pred_shift_imm tsz8_64, bits<4> opc, string asm, - ZPRRegOp zprty, Operand immtype, - ElementSizeEnum size> + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", "", @@ -4412,41 +4434,53 @@ let Constraints = "$Zdn = $_Zdn"; let DestructiveInstType = DestructiveOther; - let ElementSize = size; + let ElementSize = zprty.ElementSize; } multiclass sve_int_bin_pred_shift_imm_left opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, - ElementSizeB>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, - ElementSizeH> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, - ElementSizeS> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, - ElementSizeD> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } } +multiclass sve2_int_bin_pred_shift_imm_left opc, string asm, + SDPatternOperator op> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { + let Inst{8} = imm{3}; + } + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + let Inst{9-8} = imm{4-3}; + } + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + let Inst{22} = imm{5}; + let Inst{9-8} = imm{4-3}; + } + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; +} + multiclass sve_int_bin_pred_shift_imm_right opc, string asm, SDPatternOperator op = null_frag> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, - ElementSizeB>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, - ElementSizeH> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, - ElementSizeS> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, - ElementSizeD> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll @@ -1,6 +1,50 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s ; +; SABA +; + +define @saba_i8( %a, %b, %c) { +; CHECK-LABEL: saba_i8: +; CHECK: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saba.nxv16i8( %a, + %b, + %c) + ret %out +} + +define @saba_i16( %a, %b, %c) { +; CHECK-LABEL: saba_i16: +; CHECK: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saba.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @saba_i32( %a, %b, %c) { +; CHECK-LABEL: saba_i32: +; CHECK: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saba.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @saba_i64( %a, %b, %c) { +; CHECK-LABEL: saba_i64: +; CHECK: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saba.nxv2i64( %a, + %b, + %c) + ret %out +} + +; ; SHADD ; @@ -133,6 +177,50 @@ } ; +; SLI +; + +define @sli_i8( %a, %b) { +; CHECK-LABEL: sli_i8: +; CHECK: sli z0.b, z1.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sli.nxv16i8( %a, + %b, + i32 0) + ret %out +} + +define @sli_i16( %a, %b) { +; CHECK-LABEL: sli_i16: +; CHECK: sli z0.h, z1.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sli.nxv8i16( %a, + %b, + i32 1) + ret %out +} + +define @sli_i32( %a, %b) { +; CHECK-LABEL: sli_i32: +; CHECK: sli z0.s, z1.s, #30 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sli.nxv4i32( %a, + %b, + i32 30); + ret %out +} + +define @sli_i64( %a, %b) { +; CHECK-LABEL: sli_i64: +; CHECK: sli z0.d, z1.d, #63 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sli.nxv2i64( %a, + %b, + i32 63) + ret %out +} + +; ; SQABS ; @@ -177,6 +265,50 @@ } ; +; SQADD +; + +define @sqadd_i8( %pg, %a, %b) { +; CHECK-LABEL: sqadd_i8: +; CHECK: sqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @sqadd_i16( %pg, %a, %b) { +; CHECK-LABEL: sqadd_i16: +; CHECK: sqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sqadd_i32( %pg, %a, %b) { +; CHECK-LABEL: sqadd_i32: +; CHECK: sqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sqadd_i64( %pg, %a, %b) { +; CHECK-LABEL: sqadd_i64: +; CHECK: sqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; ; SQDMULH (Vector) ; @@ -531,281 +663,1264 @@ } ; -; SRHADD +; SQRSHL ; -define @srhadd_i8( %pg, %a, %b) { -; CHECK-LABEL: srhadd_i8: -; CHECK: srhadd z0.b, p0/m, z0.b, z1.b +define @sqrshl_i8( %pg, %a, %b) { +; CHECK-LABEL: sqrshl_i8: +; CHECK: sqrshl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.srhadd.nxv16i8( %pg, + %out = call @llvm.aarch64.sve.sqrshl.nxv16i8( %pg, %a, %b) ret %out } -define @srhadd_i16( %pg, %a, %b) { -; CHECK-LABEL: srhadd_i16: -; CHECK: srhadd z0.h, p0/m, z0.h, z1.h +define @sqrshl_i16( %pg, %a, %b) { +; CHECK-LABEL: sqrshl_i16: +; CHECK: sqrshl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.srhadd.nxv8i16( %pg, + %out = call @llvm.aarch64.sve.sqrshl.nxv8i16( %pg, %a, %b) ret %out } -define @srhadd_i32( %pg, %a, %b) { -; CHECK-LABEL: srhadd_i32: -; CHECK: srhadd z0.s, p0/m, z0.s, z1.s +define @sqrshl_i32( %pg, %a, %b) { +; CHECK-LABEL: sqrshl_i32: +; CHECK: sqrshl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.srhadd.nxv4i32( %pg, + %out = call @llvm.aarch64.sve.sqrshl.nxv4i32( %pg, %a, %b) ret %out } -define @srhadd_i64( %pg, %a, %b) { -; CHECK-LABEL: srhadd_i64: -; CHECK: srhadd z0.d, p0/m, z0.d, z1.d +define @sqrshl_i64( %pg, %a, %b) { +; CHECK-LABEL: sqrshl_i64: +; CHECK: sqrshl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.srhadd.nxv2i64( %pg, + %out = call @llvm.aarch64.sve.sqrshl.nxv2i64( %pg, %a, %b) ret %out } ; -; UHADD +; SQSHL (Vectors) ; -define @uhadd_i8( %pg, %a, %b) { -; CHECK-LABEL: uhadd_i8: -; CHECK: uhadd z0.b, p0/m, z0.b, z1.b +define @sqshl_i8( %pg, %a, %b) { +; CHECK-LABEL: sqshl_i8: +; CHECK: sqshl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhadd.nxv16i8( %pg, + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, %a, %b) ret %out } -define @uhadd_i16( %pg, %a, %b) { -; CHECK-LABEL: uhadd_i16: -; CHECK: uhadd z0.h, p0/m, z0.h, z1.h +define @sqshl_i16( %pg, %a, %b) { +; CHECK-LABEL: sqshl_i16: +; CHECK: sqshl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhadd.nxv8i16( %pg, + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, %a, %b) ret %out } -define @uhadd_i32( %pg, %a, %b) { -; CHECK-LABEL: uhadd_i32: -; CHECK: uhadd z0.s, p0/m, z0.s, z1.s +define @sqshl_i32( %pg, %a, %b) { +; CHECK-LABEL: sqshl_i32: +; CHECK: sqshl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhadd.nxv4i32( %pg, + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, %a, %b) ret %out } -define @uhadd_i64( %pg, %a, %b) { -; CHECK-LABEL: uhadd_i64: -; CHECK: uhadd z0.d, p0/m, z0.d, z1.d +define @sqshl_i64( %pg, %a, %b) { +; CHECK-LABEL: sqshl_i64: +; CHECK: sqshl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhadd.nxv2i64( %pg, + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, %a, %b) ret %out } ; -; UHSUB +; SQSHLU ; -define @uhsub_i8( %pg, %a, %b) { -; CHECK-LABEL: uhsub_i8: -; CHECK: uhsub z0.b, p0/m, z0.b, z1.b +define @sqshlu_i8( %pg, %a) { +; CHECK-LABEL: sqshlu_i8: +; CHECK: sqshlu z0.b, p0/m, z0.b, #2 ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsub.nxv16i8( %pg, + %out = call @llvm.aarch64.sve.sqshlu.nxv16i8( %pg, + %a, + i32 2) + ret %out +} + +define @sqshlu_i16( %pg, %a) { +; CHECK-LABEL: sqshlu_i16: +; CHECK: sqshlu z0.h, p0/m, z0.h, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv8i16( %pg, + %a, + i32 3) + ret %out +} + +define @sqshlu_i32( %pg, %a) { +; CHECK-LABEL: sqshlu_i32: +; CHECK: sqshlu z0.s, p0/m, z0.s, #29 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv4i32( %pg, + %a, + i32 29) + ret %out +} + +define @sqshlu_i64( %pg, %a) { +; CHECK-LABEL: sqshlu_i64: +; CHECK: sqshlu z0.d, p0/m, z0.d, #62 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshlu.nxv2i64( %pg, + %a, + i32 62) + ret %out +} + +; +; SQSUB +; + +define @sqsub_i8( %pg, %a, %b) { +; CHECK-LABEL: sqsub_i8: +; CHECK: sqsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqsub.nxv16i8( %pg, %a, %b) ret %out } -define @uhsub_i16( %pg, %a, %b) { -; CHECK-LABEL: uhsub_i16: -; CHECK: uhsub z0.h, p0/m, z0.h, z1.h +define @sqsub_i16( %pg, %a, %b) { +; CHECK-LABEL: sqsub_i16: +; CHECK: sqsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsub.nxv8i16( %pg, + %out = call @llvm.aarch64.sve.sqsub.nxv8i16( %pg, %a, %b) ret %out } -define @uhsub_i32( %pg, %a, %b) { -; CHECK-LABEL: uhsub_i32: -; CHECK: uhsub z0.s, p0/m, z0.s, z1.s +define @sqsub_i32( %pg, %a, %b) { +; CHECK-LABEL: sqsub_i32: +; CHECK: sqsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsub.nxv4i32( %pg, + %out = call @llvm.aarch64.sve.sqsub.nxv4i32( %pg, %a, %b) ret %out } -define @uhsub_i64( %pg, %a, %b) { -; CHECK-LABEL: uhsub_i64: -; CHECK: uhsub z0.d, p0/m, z0.d, z1.d +define @sqsub_i64( %pg, %a, %b) { +; CHECK-LABEL: sqsub_i64: +; CHECK: sqsub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsub.nxv2i64( %pg, + %out = call @llvm.aarch64.sve.sqsub.nxv2i64( %pg, %a, %b) ret %out } ; -; UHSUBR +; SQSUBR ; -define @uhsubr_i8( %pg, %a, %b) { -; CHECK-LABEL: uhsubr_i8: -; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b +define @sqsubr_i8( %pg, %a, %b) { +; CHECK-LABEL: sqsubr_i8: +; CHECK: sqsubr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsubr.nxv16i8( %pg, + %out = call @llvm.aarch64.sve.sqsubr.nxv16i8( %pg, %a, %b) ret %out } -define @uhsubr_i16( %pg, %a, %b) { -; CHECK-LABEL: uhsubr_i16: -; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h +define @sqsubr_i16( %pg, %a, %b) { +; CHECK-LABEL: sqsubr_i16: +; CHECK: sqsubr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsubr.nxv8i16( %pg, + %out = call @llvm.aarch64.sve.sqsubr.nxv8i16( %pg, %a, %b) ret %out } -define @uhsubr_i32( %pg, %a, %b) { -; CHECK-LABEL: uhsubr_i32: -; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s +define @sqsubr_i32( %pg, %a, %b) { +; CHECK-LABEL: sqsubr_i32: +; CHECK: sqsubr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsubr.nxv4i32( %pg, + %out = call @llvm.aarch64.sve.sqsubr.nxv4i32( %pg, %a, %b) ret %out } -define @uhsubr_i64( %pg, %a, %b) { -; CHECK-LABEL: uhsubr_i64: -; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d +define @sqsubr_i64( %pg, %a, %b) { +; CHECK-LABEL: sqsubr_i64: +; CHECK: sqsubr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uhsubr.nxv2i64( %pg, + %out = call @llvm.aarch64.sve.sqsubr.nxv2i64( %pg, %a, %b) ret %out } ; -; URECPE -; - -define @urecpe_i32( %a, %pg, %b) { -; CHECK-LABEL: urecpe_i32: -; CHECK: urecpe z0.s, p0/m, z1.s -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.urecpe.nxv4i32( %a, - %pg, - %b) - ret %out -} - -; -; URHADD +; SRHADD ; -define @urhadd_i8( %pg, %a, %b) { -; CHECK-LABEL: urhadd_i8: -; CHECK: urhadd z0.b, p0/m, z0.b, z1.b +define @srhadd_i8( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i8: +; CHECK: srhadd z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.urhadd.nxv16i8( %pg, + %out = call @llvm.aarch64.sve.srhadd.nxv16i8( %pg, %a, %b) ret %out } -define @urhadd_i16( %pg, %a, %b) { -; CHECK-LABEL: urhadd_i16: -; CHECK: urhadd z0.h, p0/m, z0.h, z1.h +define @srhadd_i16( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i16: +; CHECK: srhadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.urhadd.nxv8i16( %pg, + %out = call @llvm.aarch64.sve.srhadd.nxv8i16( %pg, %a, %b) ret %out } -define @urhadd_i32( %pg, %a, %b) { -; CHECK-LABEL: urhadd_i32: -; CHECK: urhadd z0.s, p0/m, z0.s, z1.s +define @srhadd_i32( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i32: +; CHECK: srhadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.urhadd.nxv4i32( %pg, + %out = call @llvm.aarch64.sve.srhadd.nxv4i32( %pg, %a, %b) ret %out } -define @urhadd_i64( %pg, %a, %b) { -; CHECK-LABEL: urhadd_i64: -; CHECK: urhadd z0.d, p0/m, z0.d, z1.d +define @srhadd_i64( %pg, %a, %b) { +; CHECK-LABEL: srhadd_i64: +; CHECK: srhadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.urhadd.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.srhadd.nxv2i64( %pg, + %a, + %b) ret %out } ; -; URSQRTE +; SRI ; -define @ursqrte_i32( %a, %pg, %b) { -; CHECK-LABEL: ursqrte_i32: -; CHECK: ursqrte z0.s, p0/m, z1.s +define @sri_i8( %a, %b) { +; CHECK-LABEL: sri_i8: +; CHECK: sri z0.b, z1.b, #1 ; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, - %pg, - %b) - ret %out + %out = call @llvm.aarch64.sve.sri.nxv16i8( %a, + %b, + i32 1) + ret %out } -declare @llvm.aarch64.sve.shadd.nxv16i8(, , ) -declare @llvm.aarch64.sve.shadd.nxv8i16(, , ) -declare @llvm.aarch64.sve.shadd.nxv4i32(, , ) -declare @llvm.aarch64.sve.shadd.nxv2i64(, , ) +define @sri_i16( %a, %b) { +; CHECK-LABEL: sri_i16: +; CHECK: sri z0.h, z1.h, #16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sri.nxv8i16( %a, + %b, + i32 16) + ret %out +} -declare @llvm.aarch64.sve.shsub.nxv16i8(, , ) -declare @llvm.aarch64.sve.shsub.nxv8i16(, , ) -declare @llvm.aarch64.sve.shsub.nxv4i32(, , ) -declare @llvm.aarch64.sve.shsub.nxv2i64(, , ) +define @sri_i32( %a, %b) { +; CHECK-LABEL: sri_i32: +; CHECK: sri z0.s, z1.s, #32 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sri.nxv4i32( %a, + %b, + i32 32); + ret %out +} -declare @llvm.aarch64.sve.shsubr.nxv16i8(, , ) -declare @llvm.aarch64.sve.shsubr.nxv8i16(, , ) -declare @llvm.aarch64.sve.shsubr.nxv4i32(, , ) -declare @llvm.aarch64.sve.shsubr.nxv2i64(, , ) +define @sri_i64( %a, %b) { +; CHECK-LABEL: sri_i64: +; CHECK: sri z0.d, z1.d, #64 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sri.nxv2i64( %a, + %b, + i32 64) + ret %out +} -declare @llvm.aarch64.sve.sqabs.nxv16i8(, , ) -declare @llvm.aarch64.sve.sqabs.nxv8i16(, , ) -declare @llvm.aarch64.sve.sqabs.nxv4i32(, , ) -declare @llvm.aarch64.sve.sqabs.nxv2i64(, , ) +; +; SRSHL +; -declare @llvm.aarch64.sve.sqdmulh.nxv16i8(, ) -declare @llvm.aarch64.sve.sqdmulh.nxv8i16(, ) -declare @llvm.aarch64.sve.sqdmulh.nxv4i32(, ) -declare @llvm.aarch64.sve.sqdmulh.nxv2i64(, ) +define @srshl_i8( %pg, %a, %b) { +; CHECK-LABEL: srshl_i8: +; CHECK: srshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshl.nxv16i8( %pg, + %a, + %b) + ret %out +} -declare @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(, , i32) -declare @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(, , i32) -declare @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(, , i32) +define @srshl_i16( %pg, %a, %b) { +; CHECK-LABEL: srshl_i16: +; CHECK: srshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshl.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @srshl_i32( %pg, %a, %b) { +; CHECK-LABEL: srshl_i32: +; CHECK: srshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshl.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @srshl_i64( %pg, %a, %b) { +; CHECK-LABEL: srshl_i64: +; CHECK: srshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshl.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SRSHR +; + +define @srshr_i8( %pg, %a) { +; CHECK-LABEL: srshr_i8: +; CHECK: srshr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv16i8( %pg, + %a, + i32 8) + ret %out +} + +define @srshr_i16( %pg, %a) { +; CHECK-LABEL: srshr_i16: +; CHECK: srshr z0.h, p0/m, z0.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv8i16( %pg, + %a, + i32 1) + ret %out +} + +define @srshr_i32( %pg, %a) { +; CHECK-LABEL: srshr_i32: +; CHECK: srshr z0.s, p0/m, z0.s, #22 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv4i32( %pg, + %a, + i32 22) + ret %out +} + +define @srshr_i64( %pg, %a) { +; CHECK-LABEL: srshr_i64: +; CHECK: srshr z0.d, p0/m, z0.d, #54 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshr.nxv2i64( %pg, + %a, + i32 54) + ret %out +} + +; +; SRSRA +; + +define @srsra_i8( %a, %b) { +; CHECK-LABEL: srsra_i8: +; CHECK: srsra z0.b, z1.b, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srsra.nxv16i8( %a, + %b, + i32 2) + ret %out +} + +define @srsra_i16( %a, %b) { +; CHECK-LABEL: srsra_i16: +; CHECK: srsra z0.h, z1.h, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srsra.nxv8i16( %a, + %b, + i32 15) + ret %out +} + +define @srsra_i32( %a, %b) { +; CHECK-LABEL: srsra_i32: +; CHECK: srsra z0.s, z1.s, #12 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srsra.nxv4i32( %a, + %b, + i32 12) + ret %out +} + +define @srsra_i64( %a, %b) { +; CHECK-LABEL: srsra_i64: +; CHECK: srsra z0.d, z1.d, #44 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srsra.nxv2i64( %a, + %b, + i32 44) + ret %out +} + +; +; SSRA +; + +define @ssra_i8( %a, %b) { +; CHECK-LABEL: ssra_i8: +; CHECK: ssra z0.b, z1.b, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssra.nxv16i8( %a, + %b, + i32 3) + ret %out +} + +define @ssra_i16( %a, %b) { +; CHECK-LABEL: ssra_i16: +; CHECK: ssra z0.h, z1.h, #14 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssra.nxv8i16( %a, + %b, + i32 14) + ret %out +} + +define @ssra_i32( %a, %b) { +; CHECK-LABEL: ssra_i32: +; CHECK: ssra z0.s, z1.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssra.nxv4i32( %a, + %b, + i32 2) + ret %out +} + +define @ssra_i64( %a, %b) { +; CHECK-LABEL: ssra_i64: +; CHECK: ssra z0.d, z1.d, #34 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssra.nxv2i64( %a, + %b, + i32 34) + ret %out +} + +; +; SUQADD +; + +define @suqadd_i8( %pg, %a, %b) { +; CHECK-LABEL: suqadd_i8: +; CHECK: suqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.suqadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @suqadd_i16( %pg, %a, %b) { +; CHECK-LABEL: suqadd_i16: +; CHECK: suqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.suqadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @suqadd_i32( %pg, %a, %b) { +; CHECK-LABEL: suqadd_i32: +; CHECK: suqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.suqadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @suqadd_i64( %pg, %a, %b) { +; CHECK-LABEL: suqadd_i64: +; CHECK: suqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.suqadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UABA +; + +define @uaba_i8( %a, %b, %c) { +; CHECK-LABEL: uaba_i8: +; CHECK: uaba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaba.nxv16i8( %a, + %b, + %c) + ret %out +} + +define @uaba_i16( %a, %b, %c) { +; CHECK-LABEL: uaba_i16: +; CHECK: uaba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaba.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @uaba_i32( %a, %b, %c) { +; CHECK-LABEL: uaba_i32: +; CHECK: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaba.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @uaba_i64( %a, %b, %c) { +; CHECK-LABEL: uaba_i64: +; CHECK: uaba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaba.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; UHADD +; + +define @uhadd_i8( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i8: +; CHECK: uhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uhadd_i16( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i16: +; CHECK: uhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uhadd_i32( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i32: +; CHECK: uhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uhadd_i64( %pg, %a, %b) { +; CHECK-LABEL: uhadd_i64: +; CHECK: uhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UHSUB +; + +define @uhsub_i8( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i8: +; CHECK: uhsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uhsub_i16( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i16: +; CHECK: uhsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uhsub_i32( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i32: +; CHECK: uhsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uhsub_i64( %pg, %a, %b) { +; CHECK-LABEL: uhsub_i64: +; CHECK: uhsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsub.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UHSUBR +; + +define @uhsubr_i8( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i8: +; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uhsubr_i16( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i16: +; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uhsubr_i32( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i32: +; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uhsubr_i64( %pg, %a, %b) { +; CHECK-LABEL: uhsubr_i64: +; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uhsubr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UQADD +; + +define @uqadd_i8( %pg, %a, %b) { +; CHECK-LABEL: uqadd_i8: +; CHECK: uqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uqadd_i16( %pg, %a, %b) { +; CHECK-LABEL: uqadd_i16: +; CHECK: uqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uqadd_i32( %pg, %a, %b) { +; CHECK-LABEL: uqadd_i32: +; CHECK: uqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uqadd_i64( %pg, %a, %b) { +; CHECK-LABEL: uqadd_i64: +; CHECK: uqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UQRSHL +; + +define @uqrshl_i8( %pg, %a, %b) { +; CHECK-LABEL: uqrshl_i8: +; CHECK: uqrshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshl.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uqrshl_i16( %pg, %a, %b) { +; CHECK-LABEL: uqrshl_i16: +; CHECK: uqrshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshl.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uqrshl_i32( %pg, %a, %b) { +; CHECK-LABEL: uqrshl_i32: +; CHECK: uqrshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshl.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uqrshl_i64( %pg, %a, %b) { +; CHECK-LABEL: uqrshl_i64: +; CHECK: uqrshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshl.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UQSHL (Vectors) +; + +define @uqshl_i8( %pg, %a, %b) { +; CHECK-LABEL: uqshl_i8: +; CHECK: uqshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uqshl_i16( %pg, %a, %b) { +; CHECK-LABEL: uqshl_i16: +; CHECK: uqshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uqshl_i32( %pg, %a, %b) { +; CHECK-LABEL: uqshl_i32: +; CHECK: uqshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uqshl_i64( %pg, %a, %b) { +; CHECK-LABEL: uqshl_i64: +; CHECK: uqshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UQSUB +; + +define @uqsub_i8( %pg, %a, %b) { +; CHECK-LABEL: uqsub_i8: +; CHECK: uqsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsub.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uqsub_i16( %pg, %a, %b) { +; CHECK-LABEL: uqsub_i16: +; CHECK: uqsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsub.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uqsub_i32( %pg, %a, %b) { +; CHECK-LABEL: uqsub_i32: +; CHECK: uqsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsub.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uqsub_i64( %pg, %a, %b) { +; CHECK-LABEL: uqsub_i64: +; CHECK: uqsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsub.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UQSUBR +; + +define @uqsubr_i8( %pg, %a, %b) { +; CHECK-LABEL: uqsubr_i8: +; CHECK: uqsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsubr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uqsubr_i16( %pg, %a, %b) { +; CHECK-LABEL: uqsubr_i16: +; CHECK: uqsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsubr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uqsubr_i32( %pg, %a, %b) { +; CHECK-LABEL: uqsubr_i32: +; CHECK: uqsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsubr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uqsubr_i64( %pg, %a, %b) { +; CHECK-LABEL: uqsubr_i64: +; CHECK: uqsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqsubr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; URECPE +; + +define @urecpe_i32( %a, %pg, %b) { +; CHECK-LABEL: urecpe_i32: +; CHECK: urecpe z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urecpe.nxv4i32( %a, + %pg, + %b) + ret %out +} + +; +; URHADD +; + +define @urhadd_i8( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i8: +; CHECK: urhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @urhadd_i16( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i16: +; CHECK: urhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @urhadd_i32( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i32: +; CHECK: urhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @urhadd_i64( %pg, %a, %b) { +; CHECK-LABEL: urhadd_i64: +; CHECK: urhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urhadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; URSHL +; + +define @urshl_i8( %pg, %a, %b) { +; CHECK-LABEL: urshl_i8: +; CHECK: urshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshl.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @urshl_i16( %pg, %a, %b) { +; CHECK-LABEL: urshl_i16: +; CHECK: urshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshl.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @urshl_i32( %pg, %a, %b) { +; CHECK-LABEL: urshl_i32: +; CHECK: urshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshl.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @urshl_i64( %pg, %a, %b) { +; CHECK-LABEL: urshl_i64: +; CHECK: urshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshl.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; URSHR +; + +define @urshr_i8( %pg, %a) { +; CHECK-LABEL: urshr_i8: +; CHECK: urshr z0.b, p0/m, z0.b, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv16i8( %pg, + %a, + i32 4) + ret %out +} + +define @urshr_i16( %pg, %a) { +; CHECK-LABEL: urshr_i16: +; CHECK: urshr z0.h, p0/m, z0.h, #13 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv8i16( %pg, + %a, + i32 13) + ret %out +} + +define @urshr_i32( %pg, %a) { +; CHECK-LABEL: urshr_i32: +; CHECK: urshr z0.s, p0/m, z0.s, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv4i32( %pg, + %a, + i32 1) + ret %out +} + +define @urshr_i64( %pg, %a) { +; CHECK-LABEL: urshr_i64: +; CHECK: urshr z0.d, p0/m, z0.d, #24 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshr.nxv2i64( %pg, + %a, + i32 24) + ret %out +} + +; +; URSQRTE +; + +define @ursqrte_i32( %a, %pg, %b) { +; CHECK-LABEL: ursqrte_i32: +; CHECK: ursqrte z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, + %pg, + %b) + ret %out +} + +; +; URSRA +; + +define @ursra_i8( %a, %b) { +; CHECK-LABEL: ursra_i8: +; CHECK: ursra z0.b, z1.b, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ursra.nxv16i8( %a, + %b, + i32 5) + ret %out +} + +define @ursra_i16( %a, %b) { +; CHECK-LABEL: ursra_i16: +; CHECK: ursra z0.h, z1.h, #12 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ursra.nxv8i16( %a, + %b, + i32 12) + ret %out +} + +define @ursra_i32( %a, %b) { +; CHECK-LABEL: ursra_i32: +; CHECK: ursra z0.s, z1.s, #31 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ursra.nxv4i32( %a, + %b, + i32 31) + ret %out +} + +define @ursra_i64( %a, %b) { +; CHECK-LABEL: ursra_i64: +; CHECK: ursra z0.d, z1.d, #14 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ursra.nxv2i64( %a, + %b, + i32 14) + ret %out +} + +; +; USQADD +; + +define @usqadd_i8( %pg, %a, %b) { +; CHECK-LABEL: usqadd_i8: +; CHECK: usqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usqadd.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @usqadd_i16( %pg, %a, %b) { +; CHECK-LABEL: usqadd_i16: +; CHECK: usqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usqadd.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @usqadd_i32( %pg, %a, %b) { +; CHECK-LABEL: usqadd_i32: +; CHECK: usqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usqadd.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @usqadd_i64( %pg, %a, %b) { +; CHECK-LABEL: usqadd_i64: +; CHECK: usqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usqadd.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; USRA +; + +define @usra_i8( %a, %b) { +; CHECK-LABEL: usra_i8: +; CHECK: usra z0.b, z1.b, #6 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usra.nxv16i8( %a, + %b, + i32 6) + ret %out +} + +define @usra_i16( %a, %b) { +; CHECK-LABEL: usra_i16: +; CHECK: usra z0.h, z1.h, #11 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usra.nxv8i16( %a, + %b, + i32 11) + ret %out +} + +define @usra_i32( %a, %b) { +; CHECK-LABEL: usra_i32: +; CHECK: usra z0.s, z1.s, #21 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usra.nxv4i32( %a, + %b, + i32 21) + ret %out +} + +define @usra_i64( %a, %b) { +; CHECK-LABEL: usra_i64: +; CHECK: usra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usra.nxv2i64( %a, + %b, + i32 4) + ret %out +} + +declare @llvm.aarch64.sve.saba.nxv16i8(, , ) +declare @llvm.aarch64.sve.saba.nxv8i16(, , ) +declare @llvm.aarch64.sve.saba.nxv4i32(, , ) +declare @llvm.aarch64.sve.saba.nxv2i64(, , ) + +declare @llvm.aarch64.sve.shadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.shadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.shadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.shadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.shsub.nxv16i8(, , ) +declare @llvm.aarch64.sve.shsub.nxv8i16(, , ) +declare @llvm.aarch64.sve.shsub.nxv4i32(, , ) +declare @llvm.aarch64.sve.shsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.shsubr.nxv16i8(, , ) +declare @llvm.aarch64.sve.shsubr.nxv8i16(, , ) +declare @llvm.aarch64.sve.shsubr.nxv4i32(, , ) +declare @llvm.aarch64.sve.shsubr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sli.nxv16i8(, , i32) +declare @llvm.aarch64.sve.sli.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sli.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sli.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqabs.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqabs.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqabs.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqabs.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqdmulh.nxv16i8(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv8i16(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv4i32(, ) +declare @llvm.aarch64.sve.sqdmulh.nxv2i64(, ) + +declare @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(, , i32) declare @llvm.aarch64.sve.sqneg.nxv16i8(, , ) declare @llvm.aarch64.sve.sqneg.nxv8i16(, , ) @@ -839,11 +1954,71 @@ declare @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(, , i32) declare @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(, , i32) +declare @llvm.aarch64.sve.sqrshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqrshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqrshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqrshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqshlu.nxv16i8(, , i32) +declare @llvm.aarch64.sve.sqshlu.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqshlu.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqshlu.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqsubr.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqsubr.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqsubr.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqsubr.nxv2i64(, , ) + declare @llvm.aarch64.sve.srhadd.nxv16i8(, , ) declare @llvm.aarch64.sve.srhadd.nxv8i16(, , ) declare @llvm.aarch64.sve.srhadd.nxv4i32(, , ) declare @llvm.aarch64.sve.srhadd.nxv2i64(, , ) +declare @llvm.aarch64.sve.sri.nxv16i8(, , i32) +declare @llvm.aarch64.sve.sri.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sri.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sri.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.srshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.srshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.srshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.srshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.srshr.nxv16i8(, , i32) +declare @llvm.aarch64.sve.srshr.nxv8i16(, , i32) +declare @llvm.aarch64.sve.srshr.nxv4i32(, , i32) +declare @llvm.aarch64.sve.srshr.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.srsra.nxv16i8(, , i32) +declare @llvm.aarch64.sve.srsra.nxv8i16(, , i32) +declare @llvm.aarch64.sve.srsra.nxv4i32(, , i32) +declare @llvm.aarch64.sve.srsra.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.ssra.nxv16i8(, , i32) +declare @llvm.aarch64.sve.ssra.nxv8i16(, , i32) +declare @llvm.aarch64.sve.ssra.nxv4i32(, , i32) +declare @llvm.aarch64.sve.ssra.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.suqadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.suqadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.suqadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.suqadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uaba.nxv16i8(, , ) +declare @llvm.aarch64.sve.uaba.nxv8i16(, , ) +declare @llvm.aarch64.sve.uaba.nxv4i32(, , ) +declare @llvm.aarch64.sve.uaba.nxv2i64(, , ) + declare @llvm.aarch64.sve.uhadd.nxv16i8(, , ) declare @llvm.aarch64.sve.uhadd.nxv8i16(, , ) declare @llvm.aarch64.sve.uhadd.nxv4i32(, , ) @@ -859,6 +2034,31 @@ declare @llvm.aarch64.sve.uhsubr.nxv4i32(, , ) declare @llvm.aarch64.sve.uhsubr.nxv2i64(, , ) +declare @llvm.aarch64.sve.uqadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqrshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqrshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqrshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqrshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqsubr.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqsubr.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqsubr.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqsubr.nxv2i64(, , ) + declare @llvm.aarch64.sve.urecpe.nxv4i32(, , ) declare @llvm.aarch64.sve.urhadd.nxv16i8(, , ) @@ -866,4 +2066,29 @@ declare @llvm.aarch64.sve.urhadd.nxv4i32(, , ) declare @llvm.aarch64.sve.urhadd.nxv2i64(, , ) +declare @llvm.aarch64.sve.urshl.nxv16i8(, , ) +declare @llvm.aarch64.sve.urshl.nxv8i16(, , ) +declare @llvm.aarch64.sve.urshl.nxv4i32(, , ) +declare @llvm.aarch64.sve.urshl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.urshr.nxv16i8(, , i32) +declare @llvm.aarch64.sve.urshr.nxv8i16(, , i32) +declare @llvm.aarch64.sve.urshr.nxv4i32(, , i32) +declare @llvm.aarch64.sve.urshr.nxv2i64(, , i32) + declare @llvm.aarch64.sve.ursqrte.nxv4i32(, , ) + +declare @llvm.aarch64.sve.ursra.nxv16i8(, , i32) +declare @llvm.aarch64.sve.ursra.nxv8i16(, , i32) +declare @llvm.aarch64.sve.ursra.nxv4i32(, , i32) +declare @llvm.aarch64.sve.ursra.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.usqadd.nxv16i8(, , ) +declare @llvm.aarch64.sve.usqadd.nxv8i16(, , ) +declare @llvm.aarch64.sve.usqadd.nxv4i32(, , ) +declare @llvm.aarch64.sve.usqadd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.usra.nxv16i8(, , i32) +declare @llvm.aarch64.sve.usra.nxv8i16(, , i32) +declare @llvm.aarch64.sve.usra.nxv4i32(, , i32) +declare @llvm.aarch64.sve.usra.nxv2i64(, , i32)