diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -516,6 +516,7 @@ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -12110,6 +12111,22 @@ return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + const TargetOptions &Options = DAG.getTarget().Options; + if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options, + ForCodeSize)) + if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options, + ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12180,21 +12197,11 @@ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12273,6 +12280,13 @@ return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1205,7 +1205,7 @@ ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] -; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]] +; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] diff --git a/llvm/test/CodeGen/PowerPC/fneg.ll b/llvm/test/CodeGen/PowerPC/fneg.ll --- a/llvm/test/CodeGen/PowerPC/fneg.ll +++ b/llvm/test/CodeGen/PowerPC/fneg.ll @@ -20,8 +20,7 @@ define float @fma_fneg_fneg(float %x, float %y, float %z) { ; CHECK-LABEL: fma_fneg_fneg: ; CHECK: # %bb.0: -; CHECK-NEXT: fneg f0, f2 -; CHECK-NEXT: fnmsubs f1, f1, f0, f3 +; CHECK-NEXT: fmadds f1, f1, f2, f3 ; CHECK-NEXT: blr %negx = fneg float %x %negy = fneg float %y @@ -32,8 +31,8 @@ define float @fma_fneg_fsub(float %x, float %y0, float %y1, float %z) { ; CHECK-LABEL: fma_fneg_fsub: ; CHECK: # %bb.0: -; CHECK-NEXT: fsubs f0, f2, f3 -; CHECK-NEXT: fnmsubs f1, f1, f0, f4 +; CHECK-NEXT: fsubs f0, f3, f2 +; CHECK-NEXT: fmadds f1, f1, f0, f4 ; CHECK-NEXT: blr %negx = fneg float %x %negy = fsub nsz float %y0, %y1