diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1053,12 +1053,31 @@ LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class SVE2_1VectorArg_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg<1>]>; + class SVE2_2VectorArg_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SVE2_2VectorArgIndexed_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, + LLVMSubdivide2VectorType<0>, + llvm_i32_ty], + [IntrNoMem, ImmArg<2>]>; + + class SVE2_2VectorArg_Wide_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + class SVE2_2VectorArg_Pred_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1766,28 +1785,34 @@ // SVE2 - Widening DSP operations // -def int_aarch64_sve_sabalb : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_sabalt : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_sabdlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_sabdlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_saddlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_saddlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_smullb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_smullt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_sqdmullb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_sqdmullt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_ssublb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_ssublt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabalb : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabalt : SVE2_3VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabdlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uabdlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uaddlb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_uaddlt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_umullb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_umullt : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_usublb : SVE2_2VectorArg_Long_Intrinsic; -def int_aarch64_sve_usublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabdlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sabdlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_saddwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_saddwt : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_sshllb : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_sshllt : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssubwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_ssubwt : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_uabalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabdlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uabdlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddlb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddlt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_uaddwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_uaddwt : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_ushllb : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_ushllt : SVE2_1VectorArg_Long_Intrinsic; +def int_aarch64_sve_usublb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_usublt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_usubwb : SVE2_2VectorArg_Wide_Intrinsic; +def int_aarch64_sve_usubwt : SVE2_2VectorArg_Wide_Intrinsic; // // SVE2 - Non-widening pairwise arithmetic @@ -1933,10 +1958,16 @@ def int_aarch64_sve_smlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_umlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_umlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_smullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_smullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_umullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_umullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; def int_aarch64_sve_sqdmlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_sqdmlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_sqdmlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; def int_aarch64_sve_sqdmlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_sqdmullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_sqdmullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic; // SVE2 MLA Unpredicated. def int_aarch64_sve_smlalb : SVE2_3VectorArg_Long_Intrinsic; @@ -1947,11 +1978,17 @@ def int_aarch64_sve_smlslt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_umlslb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_umlslt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_smullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_smullt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_umullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_umullt : SVE2_2VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlalb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlalt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlslb : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlslt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmullb : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmullt : SVE2_2VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlalbt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_sqdmlslbt : SVE2_3VectorArg_Long_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1475,14 +1475,14 @@ defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>; // SVE2 integer multiply long (indexed) - defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">; - defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt">; - defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb">; - defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt">; + defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb", int_aarch64_sve_smullb_lane>; + defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt", int_aarch64_sve_smullt_lane>; + defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb", int_aarch64_sve_umullb_lane>; + defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt", int_aarch64_sve_umullt_lane>; // SVE2 saturating multiply (indexed) - defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb">; - defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">; + defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb", int_aarch64_sve_sqdmullb_lane>; + defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt", int_aarch64_sve_sqdmullt_lane>; // SVE2 integer multiply-add long (indexed) defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb_lane>; @@ -1593,14 +1593,14 @@ defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>; // SVE2 integer add/subtract wide - defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">; - defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">; - defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">; - defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">; - defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">; - defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">; - defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">; - defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">; + defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", int_aarch64_sve_saddwb>; + defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", int_aarch64_sve_saddwt>; + defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", int_aarch64_sve_uaddwb>; + defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", int_aarch64_sve_uaddwt>; + defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>; + defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>; + defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>; + defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt", int_aarch64_sve_usubwt>; // SVE2 integer multiply long defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>; @@ -1693,10 +1693,10 @@ defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">; // SVE2 bitwise shift left long - defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">; - defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">; - defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">; - defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">; + defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb", int_aarch64_sve_sshllb>; + defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt", int_aarch64_sve_sshllt>; + defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb", int_aarch64_sve_ushllb>; + defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt", int_aarch64_sve_ushllt>; // SVE2 integer add/subtract interleaved long defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2731,9 +2731,10 @@ def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -multiclass sve2_int_mul_long_by_indexed_elem opc, string asm> { +multiclass sve2_int_mul_long_by_indexed_elem opc, string asm, + SDPatternOperator op> { def _S : sve2_int_mul_by_indexed_elem<0b10, { opc{2-1}, ?, opc{0} }, asm, - ZPR32, ZPR16, ZPR3b16, VectorIndexH> { + ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{20-19} = iop{2-1}; @@ -2741,13 +2742,16 @@ let Inst{11} = iop{0}; } def _D : sve2_int_mul_by_indexed_elem<0b11, { opc{2-1}, ?, opc{0} }, asm, - ZPR64, ZPR32, ZPR4b32, VectorIndexS> { + ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> { bits<4> Zm; bits<2> iop; let Inst{20} = iop{1}; let Inst{19-16} = Zm; let Inst{11} = iop{0}; } + + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2894,10 +2898,15 @@ def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve2_wide_int_arith_wide opc, string asm> { +multiclass sve2_wide_int_arith_wide opc, string asm, + SDPatternOperator op> { def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>; def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>; def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } multiclass sve2_pmul_long opc, string asm> { @@ -2992,7 +3001,8 @@ let Inst{4-0} = Zd; } -multiclass sve2_bitwise_shift_left_long opc, string asm> { +multiclass sve2_bitwise_shift_left_long opc, string asm, + SDPatternOperator op> { def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm, ZPR16, ZPR8, vecshiftL8>; def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm, @@ -3003,6 +3013,9 @@ ZPR64, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; + def : SVE_2_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll @@ -193,6 +193,69 @@ } ; +; SADDWB +; + +define @saddwb_b( %a, %b) { +; CHECK-LABEL: saddwb_b: +; CHECK: saddwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwb.nxv8i16( %a, + %b) + ret %out +} + +define @saddwb_h( %a, %b) { +; CHECK-LABEL: saddwb_h: +; CHECK: saddwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwb.nxv4i32( %a, + %b) + ret %out +} + +define @saddwb_s( %a, %b) { +; CHECK-LABEL: saddwb_s: +; CHECK: saddwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwb.nxv2i64( %a, + %b) + ret %out +} + +; +; SADDWT +; + +define @saddwt_b( %a, %b) { +; CHECK-LABEL: saddwt_b: +; CHECK: saddwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwt.nxv8i16( %a, + %b) + ret %out +} + +define @saddwt_h( %a, %b) { +; CHECK-LABEL: saddwt_h: +; CHECK: saddwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwt.nxv4i32( %a, + %b) + ret %out +} + +define @saddwt_s( %a, %b) { +; CHECK-LABEL: saddwt_s: +; CHECK: saddwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddwt.nxv2i64( %a, + %b) + ret %out +} + + +; ; SMULLB (Vectors) ; @@ -224,6 +287,30 @@ } ; +; SMULLB (Indexed) +; + +define @smullb_lane_h( %a, %b) { +; CHECK-LABEL: smullb_lane_h: +; CHECK: smullb z0.s, z0.h, z1.h[4] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.lane.nxv4i32( %a, + %b, + i32 4) + ret %out +} + +define @smullb_lane_s( %a, %b) { +; CHECK-LABEL: smullb_lane_s: +; CHECK: smullb z0.d, z0.s, z1.s[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullb.lane.nxv2i64( %a, + %b, + i32 3) + ret %out +} + +; ; SMULLT (Vectors) ; @@ -255,6 +342,30 @@ } ; +; SMULLT (Indexed) +; + +define @smullt_lane_h( %a, %b) { +; CHECK-LABEL: smullt_lane_h: +; CHECK: smullt z0.s, z0.h, z1.h[5] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.lane.nxv4i32( %a, + %b, + i32 5) + ret %out +} + +define @smullt_lane_s( %a, %b) { +; CHECK-LABEL: smullt_lane_s: +; CHECK: smullt z0.d, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.smullt.lane.nxv2i64( %a, + %b, + i32 2) + ret %out +} + +; ; SQDMULLB (Vectors) ; @@ -286,6 +397,30 @@ } ; +; SQDMULLB (Indexed) +; + +define @sqdmullb_lane_h( %a, %b) { +; CHECK-LABEL: sqdmullb_lane_h: +; CHECK: sqdmullb z0.s, z0.h, z1.h[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.lane.nxv4i32( %a, + %b, + i32 2) + ret %out +} + +define @sqdmullb_lane_s( %a, %b) { +; CHECK-LABEL: sqdmullb_lane_s: +; CHECK: sqdmullb z0.d, z0.s, z1.s[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullb.lane.nxv2i64( %a, + %b, + i32 1) + ret %out +} + +; ; SQDMULLT (Vectors) ; @@ -317,6 +452,30 @@ } ; +; SQDMULLT (Indexed) +; + +define @sqdmullt_lane_h( %a, %b) { +; CHECK-LABEL: sqdmullt_lane_h: +; CHECK: sqdmullt z0.s, z0.h, z1.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.lane.nxv4i32( %a, + %b, + i32 3) + ret %out +} + +define @sqdmullt_lane_s( %a, %b) { +; CHECK-LABEL: sqdmullt_lane_s: +; CHECK: sqdmullt z0.d, z0.s, z1.s[0] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmullt.lane.nxv2i64( %a, + %b, + i32 0) + ret %out +} + +; ; SSUBLB ; @@ -348,6 +507,62 @@ } ; +; SSHLLB +; + +define @sshllb_b( %a) { +; CHECK-LABEL: sshllb_b: +; CHECK: sshllb z0.h, z0.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllb.nxv8i16( %a, i32 0) + ret %out +} + +define @sshllb_h( %a) { +; CHECK-LABEL: sshllb_h: +; CHECK: sshllb z0.s, z0.h, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllb.nxv4i32( %a, i32 1) + ret %out +} + +define @sshllb_s( %a) { +; CHECK-LABEL: sshllb_s: +; CHECK: sshllb z0.d, z0.s, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllb.nxv2i64( %a, i32 2) + ret %out +} + +; +; SSHLLT +; + +define @sshllt_b( %a) { +; CHECK-LABEL: sshllt_b: +; CHECK: sshllt z0.h, z0.b, #3 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllt.nxv8i16( %a, i32 3) + ret %out +} + +define @sshllt_h( %a) { +; CHECK-LABEL: sshllt_h: +; CHECK: sshllt z0.s, z0.h, #4 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllt.nxv4i32( %a, i32 4) + ret %out +} + +define @sshllt_s( %a) { +; CHECK-LABEL: sshllt_s: +; CHECK: sshllt z0.d, z0.s, #5 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sshllt.nxv2i64( %a, i32 5) + ret %out +} + +; ; SSUBLT ; @@ -379,6 +594,68 @@ } ; +; SSUBWB +; + +define @ssubwb_b( %a, %b) { +; CHECK-LABEL: ssubwb_b: +; CHECK: ssubwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwb.nxv8i16( %a, + %b) + ret %out +} + +define @ssubwb_h( %a, %b) { +; CHECK-LABEL: ssubwb_h: +; CHECK: ssubwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwb.nxv4i32( %a, + %b) + ret %out +} + +define @ssubwb_s( %a, %b) { +; CHECK-LABEL: ssubwb_s: +; CHECK: ssubwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwb.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBWT +; + +define @ssubwt_b( %a, %b) { +; CHECK-LABEL: ssubwt_b: +; CHECK: ssubwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwt.nxv8i16( %a, + %b) + ret %out +} + +define @ssubwt_h( %a, %b) { +; CHECK-LABEL: ssubwt_h: +; CHECK: ssubwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwt.nxv4i32( %a, + %b) + ret %out +} + +define @ssubwt_s( %a, %b) { +; CHECK-LABEL: ssubwt_s: +; CHECK: ssubwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubwt.nxv2i64( %a, + %b) + ret %out +} + +; ; UABALB ; @@ -571,6 +848,68 @@ } ; +; UADDWB +; + +define @uaddwb_b( %a, %b) { +; CHECK-LABEL: uaddwb_b: +; CHECK: uaddwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwb.nxv8i16( %a, + %b) + ret %out +} + +define @uaddwb_h( %a, %b) { +; CHECK-LABEL: uaddwb_h: +; CHECK: uaddwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwb.nxv4i32( %a, + %b) + ret %out +} + +define @uaddwb_s( %a, %b) { +; CHECK-LABEL: uaddwb_s: +; CHECK: uaddwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwb.nxv2i64( %a, + %b) + ret %out +} + +; +; UADDWT +; + +define @uaddwt_b( %a, %b) { +; CHECK-LABEL: uaddwt_b: +; CHECK: uaddwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwt.nxv8i16( %a, + %b) + ret %out +} + +define @uaddwt_h( %a, %b) { +; CHECK-LABEL: uaddwt_h: +; CHECK: uaddwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwt.nxv4i32( %a, + %b) + ret %out +} + +define @uaddwt_s( %a, %b) { +; CHECK-LABEL: uaddwt_s: +; CHECK: uaddwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uaddwt.nxv2i64( %a, + %b) + ret %out +} + +; ; UMULLB (Vectors) ; @@ -602,6 +941,31 @@ } ; +; UMULLB (Indexed) +; + +define @umullb_lane_h( %a, %b) { +; CHECK-LABEL: umullb_lane_h: +; CHECK: umullb z0.s, z0.h, z1.h[0] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.lane.nxv4i32( %a, + %b, + i32 0) + ret %out +} + + +define @umullb_lane_s( %a, %b) { +; CHECK-LABEL: umullb_lane_s: +; CHECK: umullb z0.d, z0.s, z1.s[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullb.lane.nxv2i64( %a, + %b, + i32 3) + ret %out +} + +; ; UMULLT (Vectors) ; @@ -633,6 +997,86 @@ } ; +; UMULLT (Indexed) +; + +define @umullt_lane_h( %a, %b) { +; CHECK-LABEL: umullt_lane_h: +; CHECK: umullt z0.s, z0.h, z1.h[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.lane.nxv4i32( %a, + %b, + i32 1) + ret %out +} + +define @umullt_lane_s( %a, %b) { +; CHECK-LABEL: umullt_lane_s: +; CHECK: umullt z0.d, z0.s, z1.s[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.umullt.lane.nxv2i64( %a, + %b, + i32 2) + ret %out +} + +; +; USHLLB +; + +define @ushllb_b( %a) { +; CHECK-LABEL: ushllb_b: +; CHECK: ushllb z0.h, z0.b, #6 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllb.nxv8i16( %a, i32 6) + ret %out +} + +define @ushllb_h( %a) { +; CHECK-LABEL: ushllb_h: +; CHECK: ushllb z0.s, z0.h, #7 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllb.nxv4i32( %a, i32 7) + ret %out +} + +define @ushllb_s( %a) { +; CHECK-LABEL: ushllb_s: +; CHECK: ushllb z0.d, z0.s, #8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllb.nxv2i64( %a, i32 8) + ret %out +} + +; +; USHLLT +; + +define @ushllt_b( %a) { +; CHECK-LABEL: ushllt_b: +; CHECK: ushllt z0.h, z0.b, #7 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllt.nxv8i16( %a, i32 7) + ret %out +} + +define @ushllt_h( %a) { +; CHECK-LABEL: ushllt_h: +; CHECK: ushllt z0.s, z0.h, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllt.nxv4i32( %a, i32 15) + ret %out +} + +define @ushllt_s( %a) { +; CHECK-LABEL: ushllt_s: +; CHECK: ushllt z0.d, z0.s, #31 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ushllt.nxv2i64( %a, i32 31) + ret %out +} + +; ; USUBLB ; @@ -694,6 +1138,68 @@ ret %out } +; +; USUBWB +; + +define @usubwb_b( %a, %b) { +; CHECK-LABEL: usubwb_b: +; CHECK: usubwb z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwb.nxv8i16( %a, + %b) + ret %out +} + +define @usubwb_h( %a, %b) { +; CHECK-LABEL: usubwb_h: +; CHECK: usubwb z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwb.nxv4i32( %a, + %b) + ret %out +} + +define @usubwb_s( %a, %b) { +; CHECK-LABEL: usubwb_s: +; CHECK: usubwb z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwb.nxv2i64( %a, + %b) + ret %out +} + +; +; USUBWT +; + +define @usubwt_b( %a, %b) { +; CHECK-LABEL: usubwt_b: +; CHECK: usubwt z0.h, z0.h, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwt.nxv8i16( %a, + %b) + ret %out +} + +define @usubwt_h( %a, %b) { +; CHECK-LABEL: usubwt_h: +; CHECK: usubwt z0.s, z0.s, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwt.nxv4i32( %a, + %b) + ret %out +} + +define @usubwt_s( %a, %b) { +; CHECK-LABEL: usubwt_s: +; CHECK: usubwt z0.d, z0.d, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.usubwt.nxv2i64( %a, + %b) + ret %out +} + declare @llvm.aarch64.sve.sabalb.nxv8i16(, , ) declare @llvm.aarch64.sve.sabalb.nxv4i32(, , ) declare @llvm.aarch64.sve.sabalb.nxv2i64(, , ) @@ -718,22 +1224,50 @@ declare @llvm.aarch64.sve.saddlt.nxv4i32(, ) declare @llvm.aarch64.sve.saddlt.nxv2i64(, ) +declare @llvm.aarch64.sve.saddwb.nxv8i16(, ) +declare @llvm.aarch64.sve.saddwb.nxv4i32(, ) +declare @llvm.aarch64.sve.saddwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.saddwt.nxv8i16(, ) +declare @llvm.aarch64.sve.saddwt.nxv4i32(, ) +declare @llvm.aarch64.sve.saddwt.nxv2i64(, ) + declare @llvm.aarch64.sve.smullb.nxv8i16(, ) declare @llvm.aarch64.sve.smullb.nxv4i32(, ) declare @llvm.aarch64.sve.smullb.nxv2i64(, ) +declare @llvm.aarch64.sve.smullb.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.smullb.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.smullt.nxv8i16(, ) declare @llvm.aarch64.sve.smullt.nxv4i32(, ) declare @llvm.aarch64.sve.smullt.nxv2i64(, ) +declare @llvm.aarch64.sve.smullt.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.smullt.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.sqdmullb.nxv8i16(, ) declare @llvm.aarch64.sve.sqdmullb.nxv4i32(, ) declare @llvm.aarch64.sve.sqdmullb.nxv2i64(, ) +declare @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.sqdmullt.nxv8i16(, ) declare @llvm.aarch64.sve.sqdmullt.nxv4i32(, ) declare @llvm.aarch64.sve.sqdmullt.nxv2i64(, ) +declare @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sshllb.nxv8i16(, i32) +declare @llvm.aarch64.sve.sshllb.nxv4i32(, i32) +declare @llvm.aarch64.sve.sshllb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.sshllt.nxv8i16(, i32) +declare @llvm.aarch64.sve.sshllt.nxv4i32(, i32) +declare @llvm.aarch64.sve.sshllt.nxv2i64(, i32) + declare @llvm.aarch64.sve.ssublb.nxv8i16(, ) declare @llvm.aarch64.sve.ssublb.nxv4i32(, ) declare @llvm.aarch64.sve.ssublb.nxv2i64(, ) @@ -742,6 +1276,14 @@ declare @llvm.aarch64.sve.ssublt.nxv4i32(, ) declare @llvm.aarch64.sve.ssublt.nxv2i64(, ) +declare @llvm.aarch64.sve.ssubwb.nxv8i16(, ) +declare @llvm.aarch64.sve.ssubwb.nxv4i32(, ) +declare @llvm.aarch64.sve.ssubwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssubwt.nxv8i16(, ) +declare @llvm.aarch64.sve.ssubwt.nxv4i32(, ) +declare @llvm.aarch64.sve.ssubwt.nxv2i64(, ) + declare @llvm.aarch64.sve.uabalb.nxv8i16(, , ) declare @llvm.aarch64.sve.uabalb.nxv4i32(, , ) declare @llvm.aarch64.sve.uabalb.nxv2i64(, , ) @@ -766,14 +1308,36 @@ declare @llvm.aarch64.sve.uaddlt.nxv4i32(, ) declare @llvm.aarch64.sve.uaddlt.nxv2i64(, ) +declare @llvm.aarch64.sve.uaddwb.nxv8i16(, ) +declare @llvm.aarch64.sve.uaddwb.nxv4i32(, ) +declare @llvm.aarch64.sve.uaddwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.uaddwt.nxv8i16(, ) +declare @llvm.aarch64.sve.uaddwt.nxv4i32(, ) +declare @llvm.aarch64.sve.uaddwt.nxv2i64(, ) + declare @llvm.aarch64.sve.umullb.nxv8i16(, ) declare @llvm.aarch64.sve.umullb.nxv4i32(, ) declare @llvm.aarch64.sve.umullb.nxv2i64(, ) +declare @llvm.aarch64.sve.umullb.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.umullb.lane.nxv2i64(, , i32) + declare @llvm.aarch64.sve.umullt.nxv8i16(, ) declare @llvm.aarch64.sve.umullt.nxv4i32(, ) declare @llvm.aarch64.sve.umullt.nxv2i64(, ) +declare @llvm.aarch64.sve.umullt.lane.nxv4i32(, , i32) +declare @llvm.aarch64.sve.umullt.lane.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.ushllb.nxv8i16(, i32) +declare @llvm.aarch64.sve.ushllb.nxv4i32(, i32) +declare @llvm.aarch64.sve.ushllb.nxv2i64(, i32) + +declare @llvm.aarch64.sve.ushllt.nxv8i16(, i32) +declare @llvm.aarch64.sve.ushllt.nxv4i32(, i32) +declare @llvm.aarch64.sve.ushllt.nxv2i64(, i32) + declare @llvm.aarch64.sve.usublb.nxv8i16(, ) declare @llvm.aarch64.sve.usublb.nxv4i32(, ) declare @llvm.aarch64.sve.usublb.nxv2i64(, ) @@ -781,3 +1345,11 @@ declare @llvm.aarch64.sve.usublt.nxv8i16(, ) declare @llvm.aarch64.sve.usublt.nxv4i32(, ) declare @llvm.aarch64.sve.usublt.nxv2i64(, ) + +declare @llvm.aarch64.sve.usubwb.nxv8i16(, ) +declare @llvm.aarch64.sve.usubwb.nxv4i32(, ) +declare @llvm.aarch64.sve.usubwb.nxv2i64(, ) + +declare @llvm.aarch64.sve.usubwt.nxv8i16(, ) +declare @llvm.aarch64.sve.usubwt.nxv4i32(, ) +declare @llvm.aarch64.sve.usubwt.nxv2i64(, )