diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -928,7 +928,7 @@ LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, ImmArg<3>]>; class AdvSIMD_SVE_CMLA_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -937,7 +937,7 @@ LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, ImmArg<4>]>; class AdvSIMD_SVE_CMLA_LANE_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -946,7 +946,7 @@ LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg<3>, ImmArg<4>]>; class AdvSIMD_SVE_EXPA_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1112,6 +1112,8 @@ // NOTE: There is no relationship between these intrinsics beyond an attempt // to reuse currently identical class definitions. class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic; + class AdvSIMD_SVE2_CADD_Intrinsic : AdvSIMD_2VectorArgIndexed_Intrinsic; + class AdvSIMD_SVE2_CMLA_Intrinsic : AdvSIMD_3VectorArgIndexed_Intrinsic; // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. @@ -1777,6 +1779,25 @@ def int_aarch64_sve_uadalp : SVE2_2VectorArg_Pred_Long_Intrinsic; // +// SVE2 - Uniform complex integer arithmetic +// + +def int_aarch64_sve_cadd_x : AdvSIMD_SVE2_CADD_Intrinsic; +def int_aarch64_sve_sqcadd_x : AdvSIMD_SVE2_CADD_Intrinsic; +def int_aarch64_sve_cmla_x : AdvSIMD_SVE2_CMLA_Intrinsic; +def int_aarch64_sve_cmla_lane_x : AdvSIMD_SVE_CMLA_LANE_Intrinsic; +def int_aarch64_sve_sqrdcmlah_x : AdvSIMD_SVE2_CMLA_Intrinsic; +def int_aarch64_sve_sqrdcmlah_lane_x : AdvSIMD_SVE_CMLA_LANE_Intrinsic; + +// +// SVE2 - Widening complex integer arithmetic +// + +def int_aarch64_sve_saddlbt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssublbt : SVE2_2VectorArg_Long_Intrinsic; +def int_aarch64_sve_ssubltb : SVE2_2VectorArg_Long_Intrinsic; + +// // SVE2 - Floating-point widening multiply-accumulate // diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -10277,15 +10277,15 @@ let DiagnosticType = "InvalidComplexRotation" # Type; let Name = "ComplexRotation" # Type; } -def complexrotateop : Operand, ImmLeaf= 0 && Imm <= 270; }], - SDNodeXForm, TImmLeaf= 0 && Imm <= 270; }], + SDNodeXFormgetTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; let PrintMethod = "printComplexRotationOp<90, 0>"; } -def complexrotateopodd : Operand, ImmLeaf= 0 && Imm <= 270; }], - SDNodeXForm, TImmLeaf= 0 && Imm <= 270; }], + SDNodeXFormgetTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1448,13 +1448,13 @@ defm CDOT_ZZZ : sve2_cintx_dot<"cdot">; // SVE2 complex integer multiply-add (indexed) - defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla">; + defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla", int_aarch64_sve_cmla_lane_x>; // SVE2 complex saturating multiply-add (indexed) - defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">; + defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_lane_x>; // SVE2 complex integer multiply-add - defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla">; - defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">; + defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla", int_aarch64_sve_cmla_x>; + defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>; // SVE2 integer multiply long (indexed) defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">; @@ -1605,8 +1605,8 @@ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; // SVE2 complex integer add - defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; - defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">; + defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>; + defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>; // SVE2 integer absolute difference and accumulate defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>; @@ -1681,9 +1681,9 @@ defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">; // SVE2 integer add/subtract interleaved long - defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">; - defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">; - defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">; + defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; + defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>; + defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>; // SVE2 histogram generation (segment) def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2551,11 +2551,16 @@ // SVE2 Complex Multiply-Add Group //===----------------------------------------------------------------------===// -multiclass sve2_int_cmla { +multiclass sve2_int_cmla { def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>; def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>; def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>; def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>; + + def : SVE_4_Op_Imm_Pat(NAME # _B)>; + def : SVE_4_Op_Imm_Pat(NAME # _H)>; + def : SVE_4_Op_Imm_Pat(NAME # _S)>; + def : SVE_4_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2603,19 +2608,28 @@ // SVE2 Complex Multiply-Add - Indexed Group //===----------------------------------------------------------------------===// -multiclass sve2_cmla_by_indexed_elem { - def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS> { +multiclass sve2_cmla_by_indexed_elem { + def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS32b> { bits<2> iop; bits<3> Zm; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD> { + def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD32b> { bit iop; bits<4> Zm; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv8i16 (op (nxv8i16 ZPR16:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3), + (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))), + (!cast(NAME # "_H") ZPR16:$Op1, ZPR16:$Op2, ZPR16:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>; + + def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv4i32 ZPR32:$Op2), (nxv4i32 ZPR32:$Op3), + (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))), + (!cast(NAME # "_S") ZPR32:$Op1, ZPR32:$Op2, ZPR32:$Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>; } //===----------------------------------------------------------------------===// @@ -2905,10 +2919,15 @@ def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; } -multiclass sve2_misc_int_addsub_long_interleaved opc, string asm> { +multiclass sve2_misc_int_addsub_long_interleaved opc, string asm, + SDPatternOperator op> { def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve2_bitwise_xor_interleaved sz, bits<1> opc, string asm, @@ -3100,11 +3119,16 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_cadd { +multiclass sve2_int_cadd { def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>; def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>; def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>; def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>; + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } class sve2_int_absdiff_accum sz, bits<4> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll @@ -0,0 +1,267 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; CADD +; + +define @cadd_b( %a, %b) { +; CHECK-LABEL: cadd_b: +; CHECK: cadd z0.b, z0.b, z1.b, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv16i8( %a, + %b, + i32 90) + ret %out +} + +define @cadd_h( %a, %b) { +; CHECK-LABEL: cadd_h: +; CHECK: cadd z0.h, z0.h, z1.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv8i16( %a, + %b, + i32 90) + ret %out +} + +define @cadd_s( %a, %b) { +; CHECK-LABEL: cadd_s: +; CHECK: cadd z0.s, z0.s, z1.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv4i32( %a, + %b, + i32 270) + ret %out +} + +define @cadd_d( %a, %b) { +; CHECK-LABEL: cadd_d: +; CHECK: cadd z0.d, z0.d, z1.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cadd.x.nxv2i64( %a, + %b, + i32 270) + ret %out +} + +; +; SQCADD +; + +define @sqcadd_b( %a, %b) { +; CHECK-LABEL: sqcadd_b: +; CHECK: sqcadd z0.b, z0.b, z1.b, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv16i8( %a, + %b, + i32 90) + ret %out +} + +define @sqcadd_h( %a, %b) { +; CHECK-LABEL: sqcadd_h: +; CHECK: sqcadd z0.h, z0.h, z1.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv8i16( %a, + %b, + i32 90) + ret %out +} + +define @sqcadd_s( %a, %b) { +; CHECK-LABEL: sqcadd_s: +; CHECK: sqcadd z0.s, z0.s, z1.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv4i32( %a, + %b, + i32 270) + ret %out +} + +define @sqcadd_d( %a, %b) { +; CHECK-LABEL: sqcadd_d: +; CHECK: sqcadd z0.d, z0.d, z1.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqcadd.x.nxv2i64( %a, + %b, + i32 270) + ret %out +} + +; +; CMLA +; + +define @cmla_b( %a, %b, %c) { +; CHECK-LABEL: cmla_b: +; CHECK: cmla z0.b, z1.b, z2.b, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv16i8( %a, + %b, + %c, + i32 90) + ret %out +} + +define @cmla_h( %a, %b, %c) { +; CHECK-LABEL: cmla_h: +; CHECK: cmla z0.h, z1.h, z2.h, #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv8i16( %a, + %b, + %c, + i32 180) + ret %out +} + +define @cmla_s( %a, %b, %c) { +; CHECK-LABEL: cmla_s: +; CHECK: cmla z0.s, z1.s, z2.s, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv4i32( %a, + %b, + %c, + i32 270) + ret %out +} + +define @cmla_d( %a, %b, %c) { +; CHECK-LABEL: cmla_d: +; CHECK: cmla z0.d, z1.d, z2.d, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.x.nxv2i64( %a, + %b, + %c, + i32 0) + ret %out +} + +; +; CMLA_LANE +; + +define @cmla_lane_h( %a, %b, %c) { +; CHECK-LABEL: cmla_lane_h: +; CHECK: cmla z0.h, z1.h, z2.h[1], #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.lane.x.nxv8i16( %a, + %b, + %c, + i32 1, + i32 180) + ret %out +} + +define @cmla_lane_s( %a, %b, %c) { +; CHECK-LABEL: cmla_lane_s: +; CHECK: cmla z0.s, z1.s, z2.s[0], #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cmla.lane.x.nxv4i32( %a, + %b, + %c, + i32 0, + i32 270) + ret %out +} + +; +; QRDCMLAH +; + +define @sqrdcmlah_b( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_b: +; CHECK: sqrdcmlah z0.b, z1.b, z2.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8( %a, + %b, + %c, + i32 0) + ret %out +} + +define @sqrdcmlah_h( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_h: +; CHECK: sqrdcmlah z0.h, z1.h, z2.h, #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16( %a, + %b, + %c, + i32 90) + ret %out +} + +define @sqrdcmlah_s( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_s: +; CHECK: sqrdcmlah z0.s, z1.s, z2.s, #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32( %a, + %b, + %c, + i32 180) + ret %out +} + +define @sqrdcmlah_d( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_d: +; CHECK: sqrdcmlah z0.d, z1.d, z2.d, #270 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64( %a, + %b, + %c, + i32 270) + ret %out +} + +; +; QRDCMLAH_LANE +; + +define @sqrdcmlah_lane_h( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_lane_h: +; CHECK: sqrdcmlah z0.h, z1.h, z2.h[1], #90 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16( %a, + %b, + %c, + i32 1, + i32 90) + ret %out +} + +define @sqrdcmlah_lane_s( %a, %b, %c) { +; CHECK-LABEL: sqrdcmlah_lane_s: +; CHECK: sqrdcmlah z0.s, z1.s, z2.s[0], #180 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32( %a, + %b, + %c, + i32 0, + i32 180) + ret %out +} + +declare @llvm.aarch64.sve.cadd.x.nxv16i8(, , i32) +declare @llvm.aarch64.sve.cadd.x.nxv8i16(, , i32) +declare @llvm.aarch64.sve.cadd.x.nxv4i32(, , i32) +declare @llvm.aarch64.sve.cadd.x.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.sqcadd.x.nxv16i8(, , i32) +declare @llvm.aarch64.sve.sqcadd.x.nxv8i16(, , i32) +declare @llvm.aarch64.sve.sqcadd.x.nxv4i32(, , i32) +declare @llvm.aarch64.sve.sqcadd.x.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.cmla.x.nxv16i8(, , , i32) +declare @llvm.aarch64.sve.cmla.x.nxv8i16(, , , i32) +declare @llvm.aarch64.sve.cmla.x.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.cmla.x.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.cmla.lane.x.nxv8i16(, , , i32, i32) +declare @llvm.aarch64.sve.cmla.lane.x.nxv4i32(, , , i32, i32) + +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(, , , i32) +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(, , , i32) +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(, , , i32) + +declare @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(, , , i32, i32) +declare @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(, , , i32, i32) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll @@ -0,0 +1,106 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; SADDLBT +; + +define @saddlbt_b( %a, %b) { +; CHECK-LABEL: saddlbt_b: +; CHECK: saddlbt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlbt.nxv8i16( %a, + %b) + ret %out +} + +define @saddlbt_h( %a, %b) { +; CHECK-LABEL: saddlbt_h: +; CHECK: saddlbt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlbt.nxv4i32( %a, + %b) + ret %out +} + +define @saddlbt_s( %a, %b) { +; CHECK-LABEL: saddlbt_s: +; CHECK: saddlbt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.saddlbt.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBLBT +; + +define @ssublbt_b( %a, %b) { +; CHECK-LABEL: ssublbt_b: +; CHECK: ssublbt z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublbt.nxv8i16( %a, + %b) + ret %out +} + +define @ssublbt_h( %a, %b) { +; CHECK-LABEL: ssublbt_h: +; CHECK: ssublbt z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublbt.nxv4i32( %a, + %b) + ret %out +} + +define @ssublbt_s( %a, %b) { +; CHECK-LABEL: ssublbt_s: +; CHECK: ssublbt z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssublbt.nxv2i64( %a, + %b) + ret %out +} + +; +; SSUBLTB +; + +define @ssubltb_b( %a, %b) { +; CHECK-LABEL: ssubltb_b: +; CHECK: ssubltb z0.h, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubltb.nxv8i16( %a, + %b) + ret %out +} + +define @ssubltb_h( %a, %b) { +; CHECK-LABEL: ssubltb_h: +; CHECK: ssubltb z0.s, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubltb.nxv4i32( %a, + %b) + ret %out +} + +define @ssubltb_s( %a, %b) { +; CHECK-LABEL: ssubltb_s: +; CHECK: ssubltb z0.d, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ssubltb.nxv2i64( %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.saddlbt.nxv8i16(, ) +declare @llvm.aarch64.sve.saddlbt.nxv4i32(, ) +declare @llvm.aarch64.sve.saddlbt.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssublbt.nxv8i16(, ) +declare @llvm.aarch64.sve.ssublbt.nxv4i32(, ) +declare @llvm.aarch64.sve.ssublbt.nxv2i64(, ) + +declare @llvm.aarch64.sve.ssubltb.nxv8i16(, ) +declare @llvm.aarch64.sve.ssubltb.nxv4i32(, ) +declare @llvm.aarch64.sve.ssubltb.nxv2i64(, )