diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -77,6 +77,8 @@ SBC, // adc, sbc instructions // Predicated instructions where inactive lanes produce undefined results. + ABDS_PRED, + ABDU_PRED, ADD_PRED, FADD_PRED, FDIV_PRED, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1206,6 +1206,8 @@ setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::ABS, VT, Custom); + setOperationAction(ISD::ABDS, VT, Custom); + setOperationAction(ISD::ABDU, VT, Custom); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); @@ -1994,6 +1996,8 @@ MAKE_CASE(AArch64ISD::CSINC) MAKE_CASE(AArch64ISD::THREAD_POINTER) MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) + MAKE_CASE(AArch64ISD::ABDS_PRED) + MAKE_CASE(AArch64ISD::ABDU_PRED) MAKE_CASE(AArch64ISD::ADD_PRED) MAKE_CASE(AArch64ISD::MUL_PRED) MAKE_CASE(AArch64ISD::MULHS_PRED) @@ -5196,6 +5200,10 @@ return LowerFixedLengthVectorSelectToSVE(Op, DAG); case ISD::ABS: return LowerABS(Op, DAG); + case ISD::ABDS: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED); + case ISD::ABDU: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED); case ISD::BITREVERSE: return LowerBitreverse(Op, DAG); case ISD::BSWAP: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -189,11 +189,13 @@ def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>; +def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>; def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>; def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>; +def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>; def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; @@ -418,6 +420,8 @@ defm UMAX_ZPZZ : sve_int_bin_pred_bhsd; defm SMIN_ZPZZ : sve_int_bin_pred_bhsd; defm UMIN_ZPZZ : sve_int_bin_pred_bhsd; + defm SABD_ZPZZ : sve_int_bin_pred_bhsd; + defm UABD_ZPZZ : sve_int_bin_pred_bhsd; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", AArch64frecpe>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>; diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-abd.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; SABD +; + +define @sabd_b( %a, %b) #0 { +; CHECK-LABEL: sabd_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv16i16( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @sabd_b_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: sabd_b_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: abs z0.b, p2/m, z0.b +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv16i8( %sub, i1 true) + ret %abs +} + +define @sabd_h( %a, %b) #0 { +; CHECK-LABEL: sabd_h: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv8i32( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @sabd_h_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: sabd_h_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv8i16( %sub, i1 true) + ret %abs +} + +define @sabd_s( %a, %b) #0 { +; CHECK-LABEL: sabd_s: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @sabd_s_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: sabd_s_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + ret %abs +} + +define @sabd_d( %a, %b) #0 { +; CHECK-LABEL: sabd_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv2i128( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @sabd_d_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: sabd_d_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %a.sext = sext %a to + %b.sext = sext %b to + %sub = sub %a.sext, %b.sext + %abs = call @llvm.abs.nxv2i64( %sub, i1 true) + ret %abs +} + +; +; UABD +; + +define @uabd_b( %a, %b) #0 { +; CHECK-LABEL: uabd_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv16i16( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @uabd_b_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: uabd_b_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: abs z0.b, p2/m, z0.b +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv16i8( %sub, i1 true) + ret %abs +} + +define @uabd_h( %a, %b) #0 { +; CHECK-LABEL: uabd_h: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv8i32( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @uabd_h_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: uabd_h_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv8i16( %sub, i1 true) + ret %abs +} + +define @uabd_s( %a, %b) #0 { +; CHECK-LABEL: uabd_s: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @uabd_s_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: uabd_s_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + ret %abs +} + +define @uabd_d( %a, %b) #0 { +; CHECK-LABEL: uabd_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv2i128( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +define @uabd_d_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: uabd_d_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv2i64( %sub, i1 true) + ret %abs +} + +declare @llvm.abs.nxv16i8(, i1) + +declare @llvm.abs.nxv8i16(, i1) +declare @llvm.abs.nxv16i16(, i1) + +declare @llvm.abs.nxv4i32(, i1) +declare @llvm.abs.nxv8i32(, i1) + +declare @llvm.abs.nxv2i64(, i1) +declare @llvm.abs.nxv4i64(, i1) + +declare @llvm.abs.nxv2i128(, i1) + +attributes #0 = { "target-features"="+neon,+sve" }