diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -244,6 +244,14 @@ def AArch64fadd_m1 : EitherVSelectOrPassthruPatFrags; def AArch64fsub_m1 : EitherVSelectOrPassthruPatFrags; +def AArch64saba : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_saba node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64sabd_p (SVEAllActive), node:$op2, node:$op3))]>; + +def AArch64uaba : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_uaba node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64uabd_p (SVEAllActive), node:$op2, node:$op3))]>; + def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1> @@ -2970,8 +2978,8 @@ defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>; // SVE2 integer absolute difference and accumulate - defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>; - defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>; + defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", AArch64saba>; + defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", AArch64uaba>; // SVE2 integer absolute difference and accumulate long defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>; diff --git a/llvm/test/CodeGen/AArch64/sve-aba.ll b/llvm/test/CodeGen/AArch64/sve-aba.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-aba.ll @@ -0,0 +1,277 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; SABA +; + +define @saba_b( %a, %b, %c) #0 { +; CHECK-LABEL: saba_b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv16i16( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @saba_b_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: saba_b_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z2.b, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv16i8( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_h( %a, %b, %c) #0 { +; CHECK-LABEL: saba_h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv8i32( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @saba_h_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: saba_h_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: sxtb z2.h, p0/m, z2.h +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv8i16( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_s( %a, %b, %c) #0 { +; CHECK-LABEL: saba_s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @saba_s_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: saba_s_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: sxth z2.s, p0/m, z2.s +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_d( %a, %b, %c) #0 { +; CHECK-LABEL: saba_d: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv2i128( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @saba_d_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: saba_d_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: sxtw z2.d, p0/m, z2.d +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %b.sext = sext %b to + %c.sext = sext %c to + %sub = sub %b.sext, %c.sext + %abs = call @llvm.abs.nxv2i64( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +; +; UABA +; + +define @uaba_b( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_b: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv16i16( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @uaba_b_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_b_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z2.b, p1/z, #1 // =0x1 +; CHECK-NEXT: uaba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv16i8( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @uaba_h( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_h: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv8i32( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @uaba_h_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_h_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: and z2.h, z2.h, #0xff +; CHECK-NEXT: uaba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv8i16( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @uaba_s( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_s: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @uaba_s_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_s_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: and z2.s, z2.s, #0xffff +; CHECK-NEXT: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @uaba_d( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_d: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv2i128( %sub, i1 true) + %trunc = trunc %abs to + %add = add %a, %trunc + ret %add +} + +define @uaba_d_promoted_ops( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_d_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: uaba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv2i64( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +; A variant of uaba_s but with the add operands switched. +define @uaba_s_commutative( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_s_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %b.zext = zext %b to + %c.zext = zext %c to + %sub = sub %b.zext, %c.zext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + %add = add %trunc, %a + ret %add +} + +declare @llvm.abs.nxv16i8(, i1) + +declare @llvm.abs.nxv8i16(, i1) +declare @llvm.abs.nxv16i16(, i1) + +declare @llvm.abs.nxv4i32(, i1) +declare @llvm.abs.nxv8i32(, i1) + +declare @llvm.abs.nxv2i64(, i1) +declare @llvm.abs.nxv4i64(, i1) + +declare @llvm.abs.nxv2i128(, i1) + +attributes #0 = { "target-features"="+neon,+sve,+sve2" }