diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1040,6 +1040,12 @@ LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class SVE2_2VectorArg_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, + LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + class SVE2_2VectorArg_Pred_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1722,6 +1728,33 @@ def int_aarch64_sve_usra : AdvSIMD_2VectorArgIndexed_Intrinsic; // +// SVE2 - Widening DSP operations +// + +def int_aarch64_sve_sabalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabdlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabdlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_smullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_smullt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmullt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabdlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabdlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_umullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_umullt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_usublb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_usublt : SVE2_2VectorArg_Long_Intrinsic; + +// // SVE2 - Non-widening pairwise arithmetic // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1561,18 +1561,18 @@ defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long - defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; - defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">; - defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb">; - defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt">; - defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb">; - defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt">; - defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb">; - defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt">; - defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb">; - defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt">; - defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb">; - defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt">; + defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; + defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>; + defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb", int_aarch64_sve_uaddlb>; + defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt", int_aarch64_sve_uaddlt>; + defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb", int_aarch64_sve_ssublb>; + defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt", int_aarch64_sve_ssublt>; + defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb", int_aarch64_sve_usublb>; + defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt", int_aarch64_sve_usublt>; + defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb", int_aarch64_sve_sabdlb>; + defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt", int_aarch64_sve_sabdlt>; + defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb", int_aarch64_sve_uabdlb>; + defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>; // SVE2 integer add/subtract wide defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">; @@ -1585,12 +1585,12 @@ defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">; // SVE2 integer multiply long - defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">; - defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">; - defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb">; - defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt">; - defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb">; - defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt">; + defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>; + defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt", int_aarch64_sve_sqdmullt>; + defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb", int_aarch64_sve_smullb>; + defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt", int_aarch64_sve_smullt>; + defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb", int_aarch64_sve_umullb>; + defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt", int_aarch64_sve_umullt>; defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb">; defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; @@ -1613,10 +1613,10 @@ defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>; // SVE2 integer absolute difference and accumulate long - defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">; - defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">; - defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">; - defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">; + defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>; + defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt", int_aarch64_sve_sabalt>; + defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb", int_aarch64_sve_uabalb>; + defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt", int_aarch64_sve_uabalt>; // SVE2 integer add/subtract long with carry defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2855,10 +2855,15 @@ let Inst{4-0} = Zd; } -multiclass sve2_wide_int_arith_long opc, string asm> { +multiclass sve2_wide_int_arith_long opc, string asm, + SDPatternOperator op> { def _H : sve2_wide_int_arith<0b01, opc, asm, ZPR16, ZPR8, ZPR8>; def _S : sve2_wide_int_arith<0b10, opc, asm, ZPR32, ZPR16, ZPR16>; def _D : sve2_wide_int_arith<0b11, opc, asm, ZPR64, ZPR32, ZPR32>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } multiclass sve2_wide_int_arith_wide opc, string asm> { @@ -3135,10 +3140,15 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve2_int_absdiff_accum_long opc, string asm> { +multiclass sve2_int_absdiff_accum_long opc, string asm, + SDPatternOperator op> { def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve2_int_addsub_long_carry opc, string asm> { diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll @@ -0,0 +1,783 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; SABALB +; + +define @sabalb_b( %a, %b, %c) { +; CHECK-LABEL: sabalb_b: +; CHECK: sabalb z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabalb.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @sabalb_h( %a, %b, %c) { +; CHECK-LABEL: sabalb_h: +; CHECK: sabalb z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabalb.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sabalb_s( %a, %b, %c) { +; CHECK-LABEL: sabalb_s: +; CHECK: sabalb z0.d, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabalb.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; SABALT +; + +define @sabalt_b( %a, %b, %c) { +; CHECK-LABEL: sabalt_b: +; CHECK: sabalt z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabalt.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @sabalt_h( %a, %b, %c) { +; CHECK-LABEL: sabalt_h: +; CHECK: sabalt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabalt.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sabalt_s( %a, %b, %c) { +; CHECK-LABEL: sabalt_s: +; CHECK: sabalt z0.d, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabalt.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; SABDLB +; + +define @sabdlb_b( %a, %b) { +; CHECK-LABEL: sabdlb_b: +; CHECK: sabdlb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabdlb.nxv8i16( %a, + %b) + ret %out +} + +define @sabdlb_h( %a, %b) { +; CHECK-LABEL: sabdlb_h: +; CHECK: sabdlb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabdlb.nxv4i32( %a, + %b) + ret %out +} + +define @sabdlb_s( %a, %b) { +; CHECK-LABEL: sabdlb_s: +; CHECK: sabdlb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabdlb.nxv2i64( %a, + %b) + ret %out +} + +; +; SABDLT +; + +define @sabdlt_b( %a, %b) { +; CHECK-LABEL: sabdlt_b: +; CHECK: sabdlt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabdlt.nxv8i16( %a, + %b) + ret %out +} + +define @sabdlt_h( %a, %b) { +; CHECK-LABEL: sabdlt_h: +; CHECK: sabdlt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabdlt.nxv4i32( %a, + %b) + ret %out +} + +define @sabdlt_s( %a, %b) { +; CHECK-LABEL: sabdlt_s: +; CHECK: sabdlt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sabdlt.nxv2i64( %a, + %b) + ret %out +} + +; +; SADDLB +; + +define @saddlb_b( %a, %b) { +; CHECK-LABEL: saddlb_b: +; CHECK: saddlb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlb.nxv8i16( %a, + %b) + ret %out +} + +define @saddlb_h( %a, %b) { +; CHECK-LABEL: saddlb_h: +; CHECK: saddlb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlb.nxv4i32( %a, + %b) + ret %out +} + +define @saddlb_s( %a, %b) { +; CHECK-LABEL: saddlb_s: +; CHECK: saddlb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlb.nxv2i64( %a, + %b) + ret %out +} + +; +; SADDLT +; + +define @saddlt_b( %a, %b) { +; CHECK-LABEL: saddlt_b: +; CHECK: saddlt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlt.nxv8i16( %a, + %b) + ret %out +} + +define @saddlt_h( %a, %b) { +; CHECK-LABEL: saddlt_h: +; CHECK: saddlt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlt.nxv4i32( %a, + %b) + ret %out +} + +define @saddlt_s( %a, %b) { +; CHECK-LABEL: saddlt_s: +; CHECK: saddlt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlt.nxv2i64( %a, + %b) + ret %out +} + +; +; SMULLB (Vectors) +; + +define @smullb_b( %a, %b) { +; CHECK-LABEL: smullb_b: +; CHECK: smullb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.nxv8i16( %a, + %b) + ret %out +} + +define @smullb_h( %a, %b) { +; CHECK-LABEL: smullb_h: +; CHECK: smullb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.nxv4i32( %a, + %b) + ret %out +} + +define @smullb_s( %a, %b) { +; CHECK-LABEL: smullb_s: +; CHECK: smullb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.nxv2i64( %a, + %b) + ret %out +} + +; +; SMULLT (Vectors) +; + +define @smullt_b( %a, %b) { +; CHECK-LABEL: smullt_b: +; CHECK: smullt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.nxv8i16( %a, + %b) + ret %out +} + +define @smullt_h( %a, %b) { +; CHECK-LABEL: smullt_h: +; CHECK: smullt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.nxv4i32( %a, + %b) + ret %out +} + +define @smullt_s( %a, %b) { +; CHECK-LABEL: smullt_s: +; CHECK: smullt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.nxv2i64( %a, + %b) + ret %out +} + +; +; SQDMULLB (Vectors) +; + +define @sqdmullb_b( %a, %b) { +; CHECK-LABEL: sqdmullb_b: +; CHECK: sqdmullb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.nxv8i16( %a, + %b) + ret %out +} + +define @sqdmullb_h( %a, %b) { +; CHECK-LABEL: sqdmullb_h: +; CHECK: sqdmullb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.nxv4i32( %a, + %b) + ret %out +} + +define @sqdmullb_s( %a, %b) { +; CHECK-LABEL: sqdmullb_s: +; CHECK: sqdmullb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.nxv2i64( %a, + %b) + ret %out +} + +; +; SQDMULLT (Vectors) +; + +define @sqdmullt_b( %a, %b) { +; CHECK-LABEL: sqdmullt_b: +; CHECK: sqdmullt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.nxv8i16( %a, + %b) + ret %out +} + +define @sqdmullt_h( %a, %b) { +; CHECK-LABEL: sqdmullt_h: +; CHECK: sqdmullt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.nxv4i32( %a, + %b) + ret %out +} + +define @sqdmullt_s( %a, %b) { +; CHECK-LABEL: sqdmullt_s: +; CHECK: sqdmullt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBLB +; + +define @ssublb_b( %a, %b) { +; CHECK-LABEL: ssublb_b: +; CHECK: ssublb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublb.nxv8i16( %a, + %b) + ret %out +} + +define @ssublb_h( %a, %b) { +; CHECK-LABEL: ssublb_h: +; CHECK: ssublb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublb.nxv4i32( %a, + %b) + ret %out +} + +define @ssublb_s( %a, %b) { +; CHECK-LABEL: ssublb_s: +; CHECK: ssublb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublb.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBLT +; + +define @ssublt_b( %a, %b) { +; CHECK-LABEL: ssublt_b: +; CHECK: ssublt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublt.nxv8i16( %a, + %b) + ret %out +} + +define @ssublt_h( %a, %b) { +; CHECK-LABEL: ssublt_h: +; CHECK: ssublt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublt.nxv4i32( %a, + %b) + ret %out +} + +define @ssublt_s( %a, %b) { +; CHECK-LABEL: ssublt_s: +; CHECK: ssublt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublt.nxv2i64( %a, + %b) + ret %out +} + +; +; UABALB +; + +define @uabalb_b( %a, %b, %c) { +; CHECK-LABEL: uabalb_b: +; CHECK: uabalb z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabalb.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @uabalb_h( %a, %b, %c) { +; CHECK-LABEL: uabalb_h: +; CHECK: uabalb z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabalb.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @uabalb_s( %a, %b, %c) { +; CHECK-LABEL: uabalb_s: +; CHECK: uabalb z0.d, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabalb.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; UABALT +; + +define @uabalt_b( %a, %b, %c) { +; CHECK-LABEL: uabalt_b: +; CHECK: uabalt z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabalt.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @uabalt_h( %a, %b, %c) { +; CHECK-LABEL: uabalt_h: +; CHECK: uabalt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabalt.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @uabalt_s( %a, %b, %c) { +; CHECK-LABEL: uabalt_s: +; CHECK: uabalt z0.d, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabalt.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; UABDLB +; + +define @uabdlb_b( %a, %b) { +; CHECK-LABEL: uabdlb_b: +; CHECK: uabdlb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabdlb.nxv8i16( %a, + %b) + ret %out +} + +define @uabdlb_h( %a, %b) { +; CHECK-LABEL: uabdlb_h: +; CHECK: uabdlb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabdlb.nxv4i32( %a, + %b) + ret %out +} + +define @uabdlb_s( %a, %b) { +; CHECK-LABEL: uabdlb_s: +; CHECK: uabdlb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabdlb.nxv2i64( %a, + %b) + ret %out +} + +; +; UABDLT +; + +define @uabdlt_b( %a, %b) { +; CHECK-LABEL: uabdlt_b: +; CHECK: uabdlt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabdlt.nxv8i16( %a, + %b) + ret %out +} + +define @uabdlt_h( %a, %b) { +; CHECK-LABEL: uabdlt_h: +; CHECK: uabdlt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabdlt.nxv4i32( %a, + %b) + ret %out +} + +define @uabdlt_s( %a, %b) { +; CHECK-LABEL: uabdlt_s: +; CHECK: uabdlt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uabdlt.nxv2i64( %a, + %b) + ret %out +} + +; +; UADDLB +; + +define @uaddlb_b( %a, %b) { +; CHECK-LABEL: uaddlb_b: +; CHECK: uaddlb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddlb.nxv8i16( %a, + %b) + ret %out +} + +define @uaddlb_h( %a, %b) { +; CHECK-LABEL: uaddlb_h: +; CHECK: uaddlb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddlb.nxv4i32( %a, + %b) + ret %out +} + +define @uaddlb_s( %a, %b) { +; CHECK-LABEL: uaddlb_s: +; CHECK: uaddlb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddlb.nxv2i64( %a, + %b) + ret %out +} + +; +; UADDLT +; + +define @uaddlt_b( %a, %b) { +; CHECK-LABEL: uaddlt_b: +; CHECK: uaddlt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddlt.nxv8i16( %a, + %b) + ret %out +} + +define @uaddlt_h( %a, %b) { +; CHECK-LABEL: uaddlt_h: +; CHECK: uaddlt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddlt.nxv4i32( %a, + %b) + ret %out +} + +define @uaddlt_s( %a, %b) { +; CHECK-LABEL: uaddlt_s: +; CHECK: uaddlt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddlt.nxv2i64( %a, + %b) + ret %out +} + +; +; UMULLB (Vectors) +; + +define @umullb_b( %a, %b) { +; CHECK-LABEL: umullb_b: +; CHECK: umullb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.nxv8i16( %a, + %b) + ret %out +} + +define @umullb_h( %a, %b) { +; CHECK-LABEL: umullb_h: +; CHECK: umullb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.nxv4i32( %a, + %b) + ret %out +} + +define @umullb_s( %a, %b) { +; CHECK-LABEL: umullb_s: +; CHECK: umullb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.nxv2i64( %a, + %b) + ret %out +} + +; +; UMULLT (Vectors) +; + +define @umullt_b( %a, %b) { +; CHECK-LABEL: umullt_b: +; CHECK: umullt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.nxv8i16( %a, + %b) + ret %out +} + +define @umullt_h( %a, %b) { +; CHECK-LABEL: umullt_h: +; CHECK: umullt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.nxv4i32( %a, + %b) + ret %out +} + +define @umullt_s( %a, %b) { +; CHECK-LABEL: umullt_s: +; CHECK: umullt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.nxv2i64( %a, + %b) + ret %out +} + +; +; USUBLB +; + +define @usublb_b( %a, %b) { +; CHECK-LABEL: usublb_b: +; CHECK: usublb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usublb.nxv8i16( %a, + %b) + ret %out +} + +define @usublb_h( %a, %b) { +; CHECK-LABEL: usublb_h: +; CHECK: usublb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usublb.nxv4i32( %a, + %b) + ret %out +} + +define @usublb_s( %a, %b) { +; CHECK-LABEL: usublb_s: +; CHECK: usublb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usublb.nxv2i64( %a, + %b) + ret %out +} + +; +; USUBLT +; + +define @usublt_b( %a, %b) { +; CHECK-LABEL: usublt_b: +; CHECK: usublt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usublt.nxv8i16( %a, + %b) + ret %out +} + +define @usublt_h( %a, %b) { +; CHECK-LABEL: usublt_h: +; CHECK: usublt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usublt.nxv4i32( %a, + %b) + ret %out +} + +define @usublt_s( %a, %b) { +; CHECK-LABEL: usublt_s: +; CHECK: usublt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usublt.nxv2i64( %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.sabalb.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabalb.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabalb.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sabalt.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabalt.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabalt.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sabdlb.nxv8i16(, ) +declare @llvm.aarch64.sve.sabdlb.nxv4i32(, ) +declare @llvm.aarch64.sve.sabdlb.nxv2i64(, ) + +declare @llvm.aarch64.sve.sabdlt.nxv8i16(, ) +declare @llvm.aarch64.sve.sabdlt.nxv4i32(, ) +declare @llvm.aarch64.sve.sabdlt.nxv2i64(, ) + +declare @llvm.aarch64.sve.saddlb.nxv8i16(, ) +declare @llvm.aarch64.sve.saddlb.nxv4i32(, ) +declare @llvm.aarch64.sve.saddlb.nxv2i64(, ) + +declare @llvm.aarch64.sve.saddlt.nxv8i16(, ) +declare @llvm.aarch64.sve.saddlt.nxv4i32(, ) +declare @llvm.aarch64.sve.saddlt.nxv2i64(, ) + +declare @llvm.aarch64.sve.smullb.nxv8i16(, ) +declare @llvm.aarch64.sve.smullb.nxv4i32(, ) +declare @llvm.aarch64.sve.smullb.nxv2i64(, ) + +declare @llvm.aarch64.sve.smullt.nxv8i16(, ) +declare @llvm.aarch64.sve.smullt.nxv4i32(, ) +declare @llvm.aarch64.sve.smullt.nxv2i64(, ) + +declare @llvm.aarch64.sve.sqdmullb.nxv8i16(, ) +declare @llvm.aarch64.sve.sqdmullb.nxv4i32(, ) +declare @llvm.aarch64.sve.sqdmullb.nxv2i64(, ) + +declare @llvm.aarch64.sve.sqdmullt.nxv8i16(, ) +declare @llvm.aarch64.sve.sqdmullt.nxv4i32(, ) +declare @llvm.aarch64.sve.sqdmullt.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssublb.nxv8i16(, ) +declare @llvm.aarch64.sve.ssublb.nxv4i32(, ) +declare @llvm.aarch64.sve.ssublb.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssublt.nxv8i16(, ) +declare @llvm.aarch64.sve.ssublt.nxv4i32(, ) +declare @llvm.aarch64.sve.ssublt.nxv2i64(, ) + +declare @llvm.aarch64.sve.uabalb.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabalb.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabalb.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabalt.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabalt.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabalt.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabdlb.nxv8i16(, ) +declare @llvm.aarch64.sve.uabdlb.nxv4i32(, ) +declare @llvm.aarch64.sve.uabdlb.nxv2i64(, ) + +declare @llvm.aarch64.sve.uabdlt.nxv8i16(, ) +declare @llvm.aarch64.sve.uabdlt.nxv4i32(, ) +declare @llvm.aarch64.sve.uabdlt.nxv2i64(, ) + +declare @llvm.aarch64.sve.uaddlb.nxv8i16(, ) +declare @llvm.aarch64.sve.uaddlb.nxv4i32(, ) +declare @llvm.aarch64.sve.uaddlb.nxv2i64(, ) + +declare @llvm.aarch64.sve.uaddlt.nxv8i16(, ) +declare @llvm.aarch64.sve.uaddlt.nxv4i32(, ) +declare @llvm.aarch64.sve.uaddlt.nxv2i64(, ) + +declare @llvm.aarch64.sve.umullb.nxv8i16(, ) +declare @llvm.aarch64.sve.umullb.nxv4i32(, ) +declare @llvm.aarch64.sve.umullb.nxv2i64(, ) + +declare @llvm.aarch64.sve.umullt.nxv8i16(, ) +declare @llvm.aarch64.sve.umullt.nxv4i32(, ) +declare @llvm.aarch64.sve.umullt.nxv2i64(, ) + +declare @llvm.aarch64.sve.usublb.nxv8i16(, ) +declare @llvm.aarch64.sve.usublb.nxv4i32(, ) +declare @llvm.aarch64.sve.usublb.nxv2i64(, ) + +declare @llvm.aarch64.sve.usublt.nxv8i16(, ) +declare @llvm.aarch64.sve.usublt.nxv4i32(, ) +declare @llvm.aarch64.sve.usublt.nxv2i64(, )