diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -243,6 +243,18 @@ return false; } + bool SelectDupNegativeZero(SDValue N) { + switch(N->getOpcode()) { + case AArch64ISD::DUP: + case ISD::SPLAT_VECTOR: { + ConstantFPSDNode *Const = dyn_cast(N->getOperand(0)); + return Const && Const->isZero() && Const->isNegative(); + } + } + + return false; + } + template bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { return SelectSVEAddSubImm(N, VT, Imm, Shift); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -208,6 +208,10 @@ (AArch64fadd_p node:$op1, node:$op2, node:$op3), [{ return N->getFlags().hasNoSignedZeros(); }]>; +def AArch64fsub_p_nsz : PatFrag<(ops node:$op1, node:$op2, node:$op3), + (AArch64fsub_p node:$op1, node:$op2, node:$op3), [{ + return N->getFlags().hasNoSignedZeros(); +}]>; def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>, @@ -416,10 +420,11 @@ [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), (xor node:$op1, (xor node:$op2, node:$op3))]>; -class fma_patfrags +class fma_patfrags : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), [(intrinsic node:$pred, node:$op1, node:$op2, node:$op3), - (sdnode (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))], + (add_zero (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0))), + (add_negzero (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDupNeg0)))], [{ if ((N->getOpcode() != AArch64ISD::FADD_PRED) && (N->getOpcode() != AArch64ISD::FSUB_PRED)) @@ -427,8 +432,8 @@ return N->getFlags().hasAllowContract(); }]>; -def AArch64fmla_m1 : fma_patfrags; -def AArch64fmls_m1 : fma_patfrags; +def AArch64fmla_m1 : fma_patfrags; +def AArch64fmls_m1 : fma_patfrags; def AArch64smax_m1 : EitherVSelectOrPassthruPatFrags; def AArch64umax_m1 : EitherVSelectOrPassthruPatFrags; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -435,6 +435,7 @@ } def SVEDup0 : ComplexPattern; +def SVEDupNeg0 : ComplexPattern; class SVE_1_Op_PassthruZero_Pat diff --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll @@ -1116,11 +1116,7 @@ define @fadd_sel_fmul_h_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_h_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1132,11 +1128,7 @@ define @fadd_sel_fmul_s_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_s_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1148,11 +1140,7 @@ define @fadd_sel_fmul_d_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_d_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadd z0.d, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1214,11 +1202,7 @@ define @fadd_sel_fmul_h_negzero_nsz( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_h_negzero_nsz: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1230,11 +1214,7 @@ define @fadd_sel_fmul_s_negzero_nsz( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_s_negzero_nsz: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1246,11 +1226,7 @@ define @fadd_sel_fmul_d_negzero_nsz( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_d_negzero_nsz: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadd z0.d, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1262,11 +1238,7 @@ define @fsub_sel_fmul_h_negzero_nsz( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_h_negzero_nsz: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fsub z0.h, z0.h, z1.h +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1278,11 +1250,7 @@ define @fsub_sel_fmul_s_negzero_nsz( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_s_negzero_nsz: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fsub z0.s, z0.s, z1.s +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer @@ -1294,11 +1262,7 @@ define @fsub_sel_fmul_d_negzero_nsz( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_d_negzero_nsz: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fsub z0.d, z0.d, z1.d +; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret %fmul = fmul %b, %c %nz = fneg zeroinitializer