Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1047,6 +1047,12 @@ LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SVE2_2VectorArg_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, + LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + class SVE2_3VectorArg_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1103,6 +1109,21 @@ llvm_i64_ty], [IntrNoMem, ImmArg<3>]>; + class SVE2_CADD_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class SVE2_CMLA_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + // NOTE: There is no relationship between these intrinsics beyond an attempt // to reuse currently identical class definitions. class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic; @@ -1719,6 +1740,27 @@ def int_aarch64_sve_uadalp : SVE2_2VectorArg_Pred_Long_Intrinsic; // +// SVE2 - Uniform complex integer arithmetic +// + +def int_aarch64_sve_cadd_x : SVE2_CADD_Intrinsic; +def int_aarch64_sve_sqcadd_x : SVE2_CADD_Intrinsic; +def int_aarch64_sve_cmla_x : SVE2_CMLA_Intrinsic; +def int_aarch64_sve_cmla_lane_x : AdvSIMD_SVE_CMLA_LANE_Intrinsic; +def int_aarch64_sve_sqrdcmlah_x : SVE2_CMLA_Intrinsic; +def int_aarch64_sve_sqrdcmlah_lane_x : AdvSIMD_SVE_CMLA_LANE_Intrinsic; + +// +// SVE2 - Widening complex integer arithmetic +// + +def int_aarch64_sve_saddlbt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmlalbt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_sqdmlslbt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublbt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssubltb : SVE2_2VectorArg_Long_Intrinsic; + +// // SVE2 - Floating-point widening multiply-accumulate // Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1448,13 +1448,13 @@ defm CDOT_ZZZ : sve2_cintx_dot<"cdot">; // SVE2 complex integer multiply-add (indexed) - defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla">; + defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla", int_aarch64_sve_cmla_lane_x>; // SVE2 complex saturating multiply-add (indexed) - defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">; + defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_lane_x>; // SVE2 complex integer multiply-add - defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla">; - defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">; + defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla", int_aarch64_sve_cmla_x>; + defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>; // SVE2 integer multiply long (indexed) defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">; @@ -1477,14 +1477,14 @@ defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt", int_aarch64_sve_umlslt>; // SVE2 integer multiply-add long (vectors, unpredicated) - defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb">; - defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt">; - defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb">; - defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt">; - defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb">; - defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt">; - defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb">; - defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt">; + defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb", null_frag>; + defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt", null_frag>; + defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb", null_frag>; + defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt", null_frag>; + defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb", null_frag>; + defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt", null_frag>; + defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb", null_frag>; + defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt", null_frag>; // SVE2 saturating multiply-add long (indexed) defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", null_frag>; @@ -1493,14 +1493,14 @@ defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt", null_frag>; // SVE2 saturating multiply-add long (vectors, unpredicated) - defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb">; - defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt">; - defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb">; - defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt">; + defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb", null_frag>; + defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt", null_frag>; + defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb", null_frag>; + defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt", null_frag>; // SVE2 saturating multiply-add interleaved long - defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt">; - defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">; + defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt", int_aarch64_sve_sqdmlalbt>; + defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt", int_aarch64_sve_sqdmlslbt>; // SVE2 integer halving add/subtract (predicated) defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", int_aarch64_sve_shadd>; @@ -1605,8 +1605,8 @@ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; // SVE2 complex integer add - defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; - defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">; + defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>; + defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>; // SVE2 integer absolute difference and accumulate defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">; @@ -1681,9 +1681,9 @@ defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">; // SVE2 integer add/subtract interleaved long - defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">; - defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">; - defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">; + defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; + defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>; + defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>; // SVE2 histogram generation (segment) def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2352,10 +2352,14 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve2_int_mla_long opc, string asm> { +multiclass sve2_int_mla_long opc, string asm, SDPatternOperator op> { def _H : sve2_int_mla<0b01, opc, asm, ZPR16, ZPR8>; def _S : sve2_int_mla<0b10, opc, asm, ZPR32, ZPR16>; def _D : sve2_int_mla<0b11, opc, asm, ZPR64, ZPR32>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2547,11 +2551,16 @@ // SVE2 Complex Multiply-Add Group //===----------------------------------------------------------------------===// -multiclass sve2_int_cmla { +multiclass sve2_int_cmla { def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>; def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>; def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>; def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>; + + def : SVE_4_Op_Imm_Pat(NAME # _B)>; + def : SVE_4_Op_Imm_Pat(NAME # _H)>; + def : SVE_4_Op_Imm_Pat(NAME # _S)>; + def : SVE_4_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2599,19 +2608,28 @@ // SVE2 Complex Multiply-Add - Indexed Group //===----------------------------------------------------------------------===// -multiclass sve2_cmla_by_indexed_elem { - def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS> { +multiclass sve2_cmla_by_indexed_elem { + def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS32b> { bits<2> iop; bits<3> Zm; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD> { + def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD32b> { bit iop; bits<4> Zm; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8i16 (op (nxv8i16 ZPR16:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3), + (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))), + (!cast(NAME # "_H") ZPR16:$Op1, ZPR16:$Op2, ZPR16:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>; + + def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv4i32 ZPR32:$Op2), (nxv4i32 ZPR32:$Op3), + (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))), + (!cast(NAME # "_S") ZPR32:$Op1, ZPR32:$Op2, ZPR32:$Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>; } //===----------------------------------------------------------------------===// @@ -2896,10 +2914,15 @@ def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; } -multiclass sve2_misc_int_addsub_long_interleaved opc, string asm> { +multiclass sve2_misc_int_addsub_long_interleaved opc, string asm, + SDPatternOperator op> { def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve2_bitwise_xor_interleaved sz, bits<1> opc, string asm, @@ -3073,11 +3096,16 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_cadd { +multiclass sve2_int_cadd { def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>; def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>; def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>; def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>; + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } class sve2_int_absdiff_accum sz, bits<4> opc, string asm, Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll @@ -0,0 +1,267 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; CADD +; + +define @cadd_b( %a, %b) { +; CHECK-LABEL: cadd_b: +; CHECK: cadd z0.b, z0.b, z1.b, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv16i8( %a, + %b, + i32 90) + ret %out +} + +define @cadd_h( %a, %b) { +; CHECK-LABEL: cadd_h: +; CHECK: cadd z0.h, z0.h, z1.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv8i16( %a, + %b, + i32 90) + ret %out +} + +define @cadd_s( %a, %b) { +; CHECK-LABEL: cadd_s: +; CHECK: cadd z0.s, z0.s, z1.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv4i32( %a, + %b, + i32 270) + ret %out +} + +define @cadd_d( %a, %b) { +; CHECK-LABEL: cadd_d: +; CHECK: cadd z0.d, z0.d, z1.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv2i64( %a, + %b, + i32 270) + ret %out +} + +; +; SQCADD +; + +define @sqcadd_b( %a, %b) { +; CHECK-LABEL: sqcadd_b: +; CHECK: sqcadd z0.b, z0.b, z1.b, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv16i8( %a, + %b, + i32 90) + ret %out +} + +define @sqcadd_h( %a, %b) { +; CHECK-LABEL: sqcadd_h: +; CHECK: sqcadd z0.h, z0.h, z1.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv8i16( %a, + %b, + i32 90) + ret %out +} + +define @sqcadd_s( %a, %b) { +; CHECK-LABEL: sqcadd_s: +; CHECK: sqcadd z0.s, z0.s, z1.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv4i32( %a, + %b, + i32 270) + ret %out +} + +define @sqcadd_d( %a, %b) { +; CHECK-LABEL: sqcadd_d: +; CHECK: sqcadd z0.d, z0.d, z1.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv2i64( %a, + %b, + i32 270) + ret %out +} + +; +; CMLA +; + +define @cmla_b( %a, %b, %c) { +; CHECK-LABEL: cmla_b: +; CHECK: cmla z0.b, z1.b, z2.b, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv16i8( %a, + %b, + %c, + i32 90) + ret %out +} + +define @cmla_h( %a, %b, %c) { +; CHECK-LABEL: cmla_h: +; CHECK: cmla z0.h, z1.h, z2.h, #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv8i16( %a, + %b, + %c, + i32 180) + ret %out +} + +define @cmla_s( %a, %b, %c) { +; CHECK-LABEL: cmla_s: +; CHECK: cmla z0.s, z1.s, z2.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv4i32( %a, + %b, + %c, + i32 270) + ret %out +} + +define @cmla_d( %a, %b, %c) { +; CHECK-LABEL: cmla_d: +; CHECK: cmla z0.d, z1.d, z2.d, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv2i64( %a, + %b, + %c, + i32 0) + ret %out +} + +; +; CMLA_LANE +; + +define @cmla_lane_h( %a, %b, %c) { +; CHECK-LABEL: cmla_lane_h: +; CHECK: cmla z0.h, z1.h, z2.h[1], #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.lane.x.nxv8i16( %a, + %b, + %c, + i32 1, + i32 180) + ret %out +} + +define @cmla_lane_s( %a, %b, %c) { +; CHECK-LABEL: cmla_lane_s: +; CHECK: cmla z0.s, z1.s, z2.s[0], #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.lane.x.nxv4i32( %a, + %b, + %c, + i32 0, + i32 270) + ret %out +} + +; +; QRDCMLAH +; + +define @sqrdcmlah_b( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_b: +; CHECK: sqrdcmlah z0.b, z1.b, z2.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8( %a, + %b, + %c, + i32 0) + ret %out +} + +define @sqrdcmlah_h( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_h: +; CHECK: sqrdcmlah z0.h, z1.h, z2.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16( %a, + %b, + %c, + i32 90) + ret %out +} + +define @sqrdcmlah_s( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_s: +; CHECK: sqrdcmlah z0.s, z1.s, z2.s, #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32( %a, + %b, + %c, + i32 180) + ret %out +} + +define @sqrdcmlah_d( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_d: +; CHECK: sqrdcmlah z0.d, z1.d, z2.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64( %a, + %b, + %c, + i32 270) + ret %out +} + +; +; QRDCMLAH_LANE +; + +define @sqrdcmlah_lane_h( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_lane_h: +; CHECK: sqrdcmlah z0.h, z1.h, z2.h[1], #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16( %a, + %b, + %c, + i32 1, + i32 90) + ret %out +} + +define @sqrdcmlah_lane_s( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_lane_s: +; CHECK: sqrdcmlah z0.s, z1.s, z2.s[0], #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32( %a, + %b, + %c, + i32 0, + i32 180) + ret %out +} + +declare @llvm.aarch64.sve.cadd.x.nxv16i8(, , i32) +declare @llvm.aarch64.sve.cadd.x.nxv8i16(, , i32) +declare @llvm.aarch64.sve.cadd.x.nxv4i32(, , i32) +declare @llvm.aarch64.sve.cadd.x.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqcadd.x.nxv16i8(, , i32) +declare @llvm.aarch64.sve.sqcadd.x.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqcadd.x.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqcadd.x.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.cmla.x.nxv16i8(, , , i32) +declare @llvm.aarch64.sve.cmla.x.nxv8i16(, , , i32) +declare @llvm.aarch64.sve.cmla.x.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.cmla.x.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.cmla.lane.x.nxv8i16(, , , i32, i32) +declare @llvm.aarch64.sve.cmla.lane.x.nxv4i32(, , , i32, i32) + +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(, , , i32) +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(, , , i32) +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(, , , i32, i32) +declare @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(, , , i32, i32) Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; SADDLBT +; + +define @saddlbt_b( %a, %b) { +; CHECK-LABEL: saddlbt_b: +; CHECK: saddlbt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlbt.nxv8i16( %a, + %b) + ret %out +} + +define @saddlbt_h( %a, %b) { +; CHECK-LABEL: saddlbt_h: +; CHECK: saddlbt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlbt.nxv4i32( %a, + %b) + ret %out +} + +define @saddlbt_s( %a, %b) { +; CHECK-LABEL: saddlbt_s: +; CHECK: saddlbt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlbt.nxv2i64( %a, + %b) + ret %out +} + +; +; SQDMLALBT +; + +define @sqdmlalbt_b( %a, %b, %c) { +; CHECK-LABEL: sqdmlalbt_b: +; CHECK: sqdmlalbt z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmlalbt.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @sqdmlalbt_h( %a, %b, %c) { +; CHECK-LABEL: sqdmlalbt_h: +; CHECK: sqdmlalbt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmlalbt.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sqdmlalbt_s( %a, %b, %c) { +; CHECK-LABEL: sqdmlalbt_s: +; CHECK: sqdmlalbt z0.d, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmlalbt.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; SQDMLSLBT +; + +define @sqdmlslbt_b( %a, %b, %c) { +; CHECK-LABEL: sqdmlslbt_b: +; CHECK: sqdmlslbt z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmlslbt.nxv8i16( %a, + %b, + %c) + ret %out +} + +define @sqdmlslbt_h( %a, %b, %c) { +; CHECK-LABEL: sqdmlslbt_h: +; CHECK: sqdmlslbt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmlslbt.nxv4i32( %a, + %b, + %c) + ret %out +} + +define @sqdmlslbt_s( %a, %b, %c) { +; CHECK-LABEL: sqdmlslbt_s: +; CHECK: sqdmlslbt z0.d, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqdmlslbt.nxv2i64( %a, + %b, + %c) + ret %out +} + +; +; SSUBLBT +; + +define @ssublbt_b( %a, %b) { +; CHECK-LABEL: ssublbt_b: +; CHECK: ssublbt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublbt.nxv8i16( %a, + %b) + ret %out +} + +define @ssublbt_h( %a, %b) { +; CHECK-LABEL: ssublbt_h: +; CHECK: ssublbt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublbt.nxv4i32( %a, + %b) + ret %out +} + +define @ssublbt_s( %a, %b) { +; CHECK-LABEL: ssublbt_s: +; CHECK: ssublbt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublbt.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBLTB +; + +define @ssubltb_b( %a, %b) { +; CHECK-LABEL: ssubltb_b: +; CHECK: ssubltb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubltb.nxv8i16( %a, + %b) + ret %out +} + +define @ssubltb_h( %a, %b) { +; CHECK-LABEL: ssubltb_h: +; CHECK: ssubltb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubltb.nxv4i32( %a, + %b) + ret %out +} + +define @ssubltb_s( %a, %b) { +; CHECK-LABEL: ssubltb_s: +; CHECK: ssubltb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubltb.nxv2i64( %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.saddlbt.nxv8i16(, ) +declare @llvm.aarch64.sve.saddlbt.nxv4i32(, ) +declare @llvm.aarch64.sve.saddlbt.nxv2i64(, ) + +declare @llvm.aarch64.sve.sqdmlalbt.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqdmlalbt.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqdmlalbt.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqdmlslbt.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqdmlslbt.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqdmlslbt.nxv2i64(, , ) + +declare @llvm.aarch64.sve.ssublbt.nxv8i16(, ) +declare @llvm.aarch64.sve.ssublbt.nxv4i32(, ) +declare @llvm.aarch64.sve.ssublbt.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssubltb.nxv8i16(, ) +declare @llvm.aarch64.sve.ssubltb.nxv4i32(, ) +declare @llvm.aarch64.sve.ssubltb.nxv2i64(, )