diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1032,6 +1032,13 @@ LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class SVE2_2VectorArg_Pred_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + class SVE2_3VectorArg_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1662,11 +1669,23 @@ // SVE2 - Non-widening pairwise arithmetic // +def int_aarch64_sve_addp : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smaxp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sminp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umaxp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uminp : AdvSIMD_Pred2VectorArg_Intrinsic; + +// +// SVE2 - Widening pairwise arithmetic +// + +def int_aarch64_sve_sadalp : SVE2_2VectorArg_Pred_Long_Intrinsic; +def int_aarch64_sve_uadalp : SVE2_2VectorArg_Pred_Long_Intrinsic; // // SVE2 - Floating-point widening multiply-accumulate diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1503,25 +1503,25 @@ defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">; // SVE2 integer halving add/subtract (predicated) - defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd">; - defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd">; - defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub">; - defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub">; - defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">; - defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">; - defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">; - defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">; + defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", null_frag>; + defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", null_frag>; + defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", null_frag>; + defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", null_frag>; + defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", null_frag>; + defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", null_frag>; + defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", null_frag>; + defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", null_frag>; // SVE2 integer pairwise add and accumulate long - defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">; - defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">; + defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>; + defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp", int_aarch64_sve_uadalp>; // SVE2 integer pairwise arithmetic - defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp">; - defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">; - defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">; - defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">; - defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">; + defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp", int_aarch64_sve_addp>; + defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp", int_aarch64_sve_smaxp>; + defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp", int_aarch64_sve_umaxp>; + defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp", int_aarch64_sve_sminp>; + defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>; // SVE2 integer unary operations (predicated) defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">; @@ -1530,28 +1530,28 @@ defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">; // SVE2 saturating add/subtract - defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd">; - defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd">; - defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub">; - defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub">; - defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">; - defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">; - defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">; - defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">; + defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>; + defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", null_frag>; + defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", null_frag>; + defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", null_frag>; + defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", null_frag>; + defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", null_frag>; + defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", null_frag>; + defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", null_frag>; // SVE2 saturating/rounding bitwise shift left (predicated) - defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl">; - defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl">; - defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr">; - defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr">; - defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl">; - defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl">; - defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl">; - defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl">; - defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr">; - defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr">; - defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">; - defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">; + defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", null_frag>; + defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", null_frag>; + defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>; + defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>; + defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", null_frag>; + defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", null_frag>; + defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", null_frag>; + defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", null_frag>; + defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>; + defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>; + defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>; + defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>; // SVE2 predicated shifts defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2716,7 +2716,7 @@ bits<5> Zdn; let Inst{31-24} = 0b01000100; let Inst{23-22} = sz; - let Inst{21} = 0b0; + let Inst{21-20} = 0b01; let Inst{20-16} = opc{5-1}; let Inst{15-14} = 0b10; let Inst{13} = opc{0}; @@ -2729,11 +2729,16 @@ let ElementSize = zprty.ElementSize; } -multiclass sve2_int_arith_pred opc, string asm> { +multiclass sve2_int_arith_pred opc, string asm, SDPatternOperator op> { def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>; def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>; def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>; def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } class sve2_int_sadd_long_accum_pairwise sz, bit U, string asm, @@ -2757,10 +2762,14 @@ let ElementSize = zprty1.ElementSize; } -multiclass sve2_int_sadd_long_accum_pairwise { +multiclass sve2_int_sadd_long_accum_pairwise { def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>; def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>; def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } class sve2_int_un_pred_arit sz, bit Q, bits<2> opc, diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll @@ -1,6 +1,50 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s ; +; ADDP +; + +define @addp_i8( %pg, %a, %b) { +; CHECK-LABEL: addp_i8: +; CHECK: addp z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.addp.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @addp_i16( %pg, %a, %b) { +; CHECK-LABEL: addp_i16: +; CHECK: addp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.addp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @addp_i32( %pg, %a, %b) { +; CHECK-LABEL: addp_i32: +; CHECK: addp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.addp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @addp_i64( %pg, %a, %b) { +; CHECK-LABEL: addp_i64: +; CHECK: addp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.addp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; ; FADDP ; @@ -170,6 +214,187 @@ ret %out } +; +; SMAXP +; + +define @smaxp_i8( %pg, %a, %b) { +; CHECK-LABEL: smaxp_i8: +; CHECK: smaxp z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smaxp.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @smaxp_i16( %pg, %a, %b) { +; CHECK-LABEL: smaxp_i16: +; CHECK: smaxp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smaxp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @smaxp_i32( %pg, %a, %b) { +; CHECK-LABEL: smaxp_i32: +; CHECK: smaxp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smaxp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @smaxp_i64( %pg, %a, %b) { +; CHECK-LABEL: smaxp_i64: +; CHECK: smaxp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smaxp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; SMINP +; + +define @sminp_i8( %pg, %a, %b) { +; CHECK-LABEL: sminp_i8: +; CHECK: sminp z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sminp.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @sminp_i16( %pg, %a, %b) { +; CHECK-LABEL: sminp_i16: +; CHECK: sminp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sminp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sminp_i32( %pg, %a, %b) { +; CHECK-LABEL: sminp_i32: +; CHECK: sminp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sminp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sminp_i64( %pg, %a, %b) { +; CHECK-LABEL: sminp_i64: +; CHECK: sminp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sminp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UMINP +; + +define @uminp_i8( %pg, %a, %b) { +; CHECK-LABEL: uminp_i8: +; CHECK: uminp z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uminp.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @uminp_i16( %pg, %a, %b) { +; CHECK-LABEL: uminp_i16: +; CHECK: uminp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uminp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uminp_i32( %pg, %a, %b) { +; CHECK-LABEL: uminp_i32: +; CHECK: uminp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uminp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uminp_i64( %pg, %a, %b) { +; CHECK-LABEL: uminp_i64: +; CHECK: uminp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uminp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UMAXP +; + +define @umaxp_i8( %pg, %a, %b) { +; CHECK-LABEL: umaxp_i8: +; CHECK: umaxp z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umaxp.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @umaxp_i16( %pg, %a, %b) { +; CHECK-LABEL: umaxp_i16: +; CHECK: umaxp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umaxp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @umaxp_i32( %pg, %a, %b) { +; CHECK-LABEL: umaxp_i32: +; CHECK: umaxp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umaxp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @umaxp_i64( %pg, %a, %b) { +; CHECK-LABEL: umaxp_i64: +; CHECK: umaxp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umaxp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.addp.nxv16i8(, , ) +declare @llvm.aarch64.sve.addp.nxv8i16(, , ) +declare @llvm.aarch64.sve.addp.nxv4i32(, , ) +declare @llvm.aarch64.sve.addp.nxv2i64(, , ) + declare @llvm.aarch64.sve.faddp.nxv8f16(, , ) declare @llvm.aarch64.sve.faddp.nxv4f32(, , ) declare @llvm.aarch64.sve.faddp.nxv2f64(, , ) @@ -189,3 +414,23 @@ declare @llvm.aarch64.sve.fminnmp.nxv8f16(, , ) declare @llvm.aarch64.sve.fminnmp.nxv4f32(, , ) declare @llvm.aarch64.sve.fminnmp.nxv2f64(, , ) + +declare @llvm.aarch64.sve.smaxp.nxv16i8(, , ) +declare @llvm.aarch64.sve.smaxp.nxv8i16(, , ) +declare @llvm.aarch64.sve.smaxp.nxv4i32(, , ) +declare @llvm.aarch64.sve.smaxp.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sminp.nxv16i8(, , ) +declare @llvm.aarch64.sve.sminp.nxv8i16(, , ) +declare @llvm.aarch64.sve.sminp.nxv4i32(, , ) +declare @llvm.aarch64.sve.sminp.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umaxp.nxv16i8(, , ) +declare @llvm.aarch64.sve.umaxp.nxv8i16(, , ) +declare @llvm.aarch64.sve.umaxp.nxv4i32(, , ) +declare @llvm.aarch64.sve.umaxp.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uminp.nxv16i8(, , ) +declare @llvm.aarch64.sve.uminp.nxv8i16(, , ) +declare @llvm.aarch64.sve.uminp.nxv4i32(, , ) +declare @llvm.aarch64.sve.uminp.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll @@ -0,0 +1,77 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; SADALP +; + +define @sadalp_i8( %pg, %a, %b) { +; CHECK-LABEL: sadalp_i8: +; CHECK: sadalp z0.h, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sadalp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @sadalp_i16( %pg, %a, %b) { +; CHECK-LABEL: sadalp_i16: +; CHECK: sadalp z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sadalp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @sadalp_i32( %pg, %a, %b) { +; CHECK-LABEL: sadalp_i32: +; CHECK: sadalp z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sadalp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; UADALP +; + +define @uadalp_i8( %pg, %a, %b) { +; CHECK-LABEL: uadalp_i8: +; CHECK: uadalp z0.h, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uadalp.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @uadalp_i16( %pg, %a, %b) { +; CHECK-LABEL: uadalp_i16: +; CHECK: uadalp z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uadalp.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @uadalp_i32( %pg, %a, %b) { +; CHECK-LABEL: uadalp_i32: +; CHECK: uadalp z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uadalp.nxv2i64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.sadalp.nxv8i16(, , ) +declare @llvm.aarch64.sve.sadalp.nxv4i32(, , ) +declare @llvm.aarch64.sve.sadalp.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uadalp.nxv8i16(, , ) +declare @llvm.aarch64.sve.uadalp.nxv4i32(, , ) +declare @llvm.aarch64.sve.uadalp.nxv2i64(, , )