diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -165,8 +165,7 @@ def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; def SDT_AArch64Arith : SDTypeProfile<1, 3, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, - SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> + SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameNumEltsAs<0,1> ]>; def SDT_AArch64FMA : SDTypeProfile<1, 4, [ @@ -240,6 +239,10 @@ def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>; def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>; +def AArch64fmul_m1 : SVE_2_Op_Vselect_Passthru; +def AArch64fadd_m1 : SVE_2_Op_Vselect_Passthru; +def AArch64fsub_m1 : SVE_2_Op_Vselect_Passthru; + def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1> @@ -451,9 +454,9 @@ defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; } - defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; - defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; - defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>; + defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>; + defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; + defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>; defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>; defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>; defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8461,3 +8461,7 @@ def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_D)>; } +class SVE_2_Op_Vselect_Passthru : PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [ + (op node:$Pg, node:$Op1, node:$Op2), + (vselect node:$Pg, node:$Op1, (inst (SVEAllActive), node:$Op1, node:$Op2)), + ]>; \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reciprocal.ll b/llvm/test/CodeGen/AArch64/sve-fp-reciprocal.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-reciprocal.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reciprocal.ll @@ -101,8 +101,7 @@ ; CHECK-NEXT: fmul z2.h, z1.h, z1.h ; CHECK-NEXT: frsqrts z2.h, z0.h, z2.h ; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: fmul z1.h, z0.h, z1.h -; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %fsqrt = call fast @llvm.sqrt.nxv8f16( %a) ret %fsqrt @@ -130,8 +129,7 @@ ; CHECK-NEXT: fmul z2.s, z1.s, z1.s ; CHECK-NEXT: frsqrts z2.s, z0.s, z2.s ; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: fmul z1.s, z0.s, z1.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %fsqrt = call fast @llvm.sqrt.nxv4f32( %a) ret %fsqrt @@ -162,8 +160,7 @@ ; CHECK-NEXT: fmul z2.d, z1.d, z1.d ; CHECK-NEXT: frsqrts z2.d, z0.d, z2.d ; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: fmul z1.d, z0.d, z1.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %fsqrt = call fast @llvm.sqrt.nxv2f64( %a) ret %fsqrt diff --git a/llvm/test/CodeGen/AArch64/sve-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fp-vselect.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fp-vselect.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @vselect_fmul_f16( %p, %a, %b) { +; CHECK-LABEL: vselect_fmul_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %mul = fmul %a, %b + %x = select %p, %a, %mul + ret %x +} + +define @vselect_fmul_f32( %p, %a, %b) { +; CHECK-LABEL: vselect_fmul_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %mul = fmul %a, %b + %x = select %p, %a, %mul + ret %x +} + +define @vselect_fmul_f64( %p, %a, %b) { +; CHECK-LABEL: vselect_fmul_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %mul = fmul %a, %b + %sel = select %p, %a, %mul + ret %sel +} + +define @vselect_fadd_f16( %p, %a, %b) { +; CHECK-LABEL: vselect_fadd_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %add = fadd %a, %b + %x = select %p, %a, %add + ret %x +} + +define @vselect_fadd_f32( %p, %a, %b) { +; CHECK-LABEL: vselect_fadd_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %add = fadd %a, %b + %x = select %p, %a, %add + ret %x +} + +define @vselect_fadd_f64( %p, %a, %b) { +; CHECK-LABEL: vselect_fadd_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %add = fadd %a, %b + %sel = select %p, %a, %add + ret %sel +} + +define @vselect_fsub_f16( %p, %a, %b) { +; CHECK-LABEL: vselect_fsub_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %sub = fsub %a, %b + %x = select %p, %a, %sub + ret %x +} + +define @vselect_fsub_f32( %p, %a, %b) { +; CHECK-LABEL: vselect_fsub_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %sub = fsub %a, %b + %x = select %p, %a, %sub + ret %x +} + +define @vselect_fsub_f64( %p, %a, %b) { +; CHECK-LABEL: vselect_fsub_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %sub = fsub %a, %b + %sel = select %p, %a, %sub + ret %sel +}