diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3538,18 +3538,35 @@ llvm_unreachable("Not Implemented"); } - /// Returns whether computing the negated form of the specified expression is - /// more expensive, the same cost or cheaper. - virtual NegatibleCost getNegatibleCost(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth = 0) const; - - /// If getNegatibleCost returns Neutral/Cheaper, return the newly negated - /// expression. + /// Return the newly negated expression if the cost is not expensive and + /// set the cost in \p Cost to indicate that if it is cheaper or neutral to + /// do the negation. virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, + bool LegalOps, bool OptForSize, + NegatibleCost &Cost, unsigned Depth = 0) const; + /// This is the helper function to return the newly negated expression only + /// when the cost is cheaper. + SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + unsigned Depth = 0) const { + NegatibleCost Cost = NegatibleCost::Expensive; + SDValue V = + getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); + if (V && Cost == NegatibleCost::Cheaper) + return V; + return SDValue(); + } + + /// This is the helper function to return the newly negated expression if + /// the cost is not expensive. + SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, + bool OptForSize, unsigned Depth = 0) const { + NegatibleCost Cost = NegatibleCost::Expensive; + return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); + } + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -527,7 +527,6 @@ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; - bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -12392,20 +12391,16 @@ return NewSel; // fold (fadd A, (fneg B)) -> (fsub A, B) - if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) == - TargetLowering::NegatibleCost::Cheaper) - return DAG.getNode( - ISD::FSUB, DL, VT, N0, - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) + if (SDValue NegN1 = TLI.getCheaperNegatedExpression( + N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags); // fold (fadd (fneg A), B) -> (fsub B, A) - if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.getNegatibleCost(N0, DAG, LegalOperations, ForCodeSize) == - TargetLowering::NegatibleCost::Cheaper) - return DAG.getNode( - ISD::FSUB, DL, VT, N1, - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) + if (SDValue NegN0 = TLI.getCheaperNegatedExpression( + N0, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12587,9 +12582,9 @@ if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) != - TargetLowering::NegatibleCost::Expensive) - return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } @@ -12607,11 +12602,9 @@ } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) != - TargetLowering::NegatibleCost::Expensive) - return DAG.getNode( - ISD::FADD, DL, VT, N0, - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12622,25 +12615,6 @@ return SDValue(); } -/// Return true if both inputs are at least as cheap in negated form and at -/// least one input is strictly cheaper in negated form. -bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { - TargetLowering::NegatibleCost LHSNeg = - TLI.getNegatibleCost(X, DAG, LegalOperations, ForCodeSize); - if (TargetLowering::NegatibleCost::Expensive == LHSNeg) - return false; - - TargetLowering::NegatibleCost RHSNeg = - TLI.getNegatibleCost(Y, DAG, LegalOperations, ForCodeSize); - if (TargetLowering::NegatibleCost::Expensive == RHSNeg) - return false; - - // Both negated operands are at least as cheap as their counterparts. - // Check to see if at least one is cheaper negated. - return (TargetLowering::NegatibleCost::Cheaper == LHSNeg || - TargetLowering::NegatibleCost::Cheaper == RHSNeg); -} - SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12715,11 +12689,17 @@ return DAG.getNode(ISD::FNEG, DL, VT, N0); // -N0 * -N1 --> N0 * N1 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) { return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } @@ -12800,13 +12780,18 @@ } // (-N0 * -N1) + N2 --> (N0 * N1) + N2 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); - } if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -12892,13 +12877,10 @@ // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) - if (!TLI.isFNegFree(VT) && - TLI.getNegatibleCost(SDValue(N, 0), DAG, LegalOperations, ForCodeSize) == - TargetLowering::NegatibleCost::Cheaper) - return DAG.getNode(ISD::FNEG, DL, VT, - TLI.getNegatedExpression(SDValue(N, 0), DAG, - LegalOperations, ForCodeSize), - Flags); + if (!TLI.isFNegFree(VT)) + if (SDValue Neg = TLI.getCheaperNegatedExpression( + SDValue(N, 0), DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags); return SDValue(); } @@ -13074,13 +13056,18 @@ } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue Neg0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue Neg1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue Neg0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue Neg1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (Neg0 && Neg1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, Neg0, Neg1, Flags); - } return SDValue(); } @@ -13626,14 +13613,14 @@ if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (TLI.getNegatibleCost(N0, DAG, LegalOperations, ForCodeSize) != - TargetLowering::NegatibleCost::Expensive) - return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize)) + return NegN0; // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't - // know it was called from a context with a nsz flag if the input fsub does - // not. + // FIXME: This is duplicated in getNegatedExpression, but getNegatedExpression + // doesn't know it was called from a context with a nsz flag if the input fsub + // does not. if (N0.getOpcode() == ISD::FSUB && (DAG.getTarget().Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5551,166 +5551,82 @@ return false; } -TargetLowering::NegatibleCost -TargetLowering::getNegatibleCost(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const { +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + NegatibleCost &Cost, + unsigned Depth) const { // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return NegatibleCost::Cheaper; + if (Op.getOpcode() == ISD::FNEG) { + Cost = NegatibleCost::Cheaper; + return Op.getOperand(0); + } - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); + // It is expensive by default. + Cost = NegatibleCost::Expensive; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Pre-increment recursion depth for use in recursive calls. + ++Depth; const SDNodeFlags Flags = Op->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (!Op.hasOneUse()) { - bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND && - isFPExtFree(VT, Op.getOperand(0).getValueType()); - - // If we already have the use of the negated floating constant, it is free - // to negate it even it has multiple uses. - bool IsFreeConstant = - Op.getOpcode() == ISD::ConstantFP && - !getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize) - .use_empty(); + EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); - if (!IsFreeExtend && !IsFreeConstant) - return NegatibleCost::Expensive; + // Don't allow anything with multiple uses unless we know it is free. + if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) { + bool IsFreeExtend = Opcode == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()); + if (!IsFreeExtend) + return SDValue(); } - // Don't recurse exponentially. - if (Depth > SelectionDAG::MaxRecursionDepth) - return NegatibleCost::Expensive; + SDLoc DL(Op); - switch (Op.getOpcode()) { + switch (Opcode) { case ISD::ConstantFP: { - if (!LegalOperations) - return NegatibleCost::Neutral; - // Don't invert constant FP values after legalization unless the target says // the negated constant is legal. - if (isOperationLegal(ISD::ConstantFP, VT) || + bool IsOpLegal = + isOperationLegal(ISD::ConstantFP, VT) || isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, - ForCodeSize)) - return NegatibleCost::Neutral; - break; + OptForSize); + + if (LegalOps && !IsOpLegal) + break; + + APFloat V = cast(Op)->getValueAPF(); + V.changeSign(); + SDValue CFP = DAG.getConstantFP(V, DL, VT); + + // If we already have the use of the negated floating constant, it is free + // to negate it even it has multiple uses. + if (!Op.hasOneUse() && CFP.use_empty()) + break; + Cost = NegatibleCost::Neutral; + return CFP; } case ISD::BUILD_VECTOR: { // Only permit BUILD_VECTOR of constants. if (llvm::any_of(Op->op_values(), [&](SDValue N) { return !N.isUndef() && !isa(N); })) - return NegatibleCost::Expensive; - if (!LegalOperations) - return NegatibleCost::Neutral; - if (isOperationLegal(ISD::ConstantFP, VT) && - isOperationLegal(ISD::BUILD_VECTOR, VT)) - return NegatibleCost::Neutral; - if (llvm::all_of(Op->op_values(), [&](SDValue N) { + break; + + bool IsOpLegal = + (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) || + llvm::all_of(Op->op_values(), [&](SDValue N) { return N.isUndef() || isFPImmLegal(neg(cast(N)->getValueAPF()), VT, - ForCodeSize); - })) - return NegatibleCost::Neutral; - break; - } - case ISD::FADD: { - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return NegatibleCost::Expensive; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) - return NegatibleCost::Expensive; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - NegatibleCost V0 = getNegatibleCost(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (V0 != NegatibleCost::Expensive) - return V0; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return getNegatibleCost(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, - Depth + 1); - } - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return NegatibleCost::Expensive; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return NegatibleCost::Neutral; - case ISD::FMUL: - case ISD::FDIV: { - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - NegatibleCost V0 = getNegatibleCost(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (V0 != NegatibleCost::Expensive) - return V0; - - // Ignore X * 2.0 because that is expected to be canonicalized to X + X. - if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) - if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) - return NegatibleCost::Expensive; - - return getNegatibleCost(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, - Depth + 1); - } - case ISD::FMA: - case ISD::FMAD: { - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return NegatibleCost::Expensive; - - // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) - // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) - NegatibleCost V2 = getNegatibleCost(Op.getOperand(2), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (NegatibleCost::Expensive == V2) - return NegatibleCost::Expensive; - - // One of Op0/Op1 must be cheaply negatible, then select the cheapest. - NegatibleCost V0 = getNegatibleCost(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - NegatibleCost V1 = getNegatibleCost(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - NegatibleCost V01 = std::min(V0, V1); - if (V01 == NegatibleCost::Expensive) - return NegatibleCost::Expensive; - return std::min(V01, V2); - } + OptForSize); + }); - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return getNegatibleCost(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1); - } - - return NegatibleCost::Expensive; -} - -SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOps, bool OptForSize, - unsigned Depth) const { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); - - assert(Depth <= SelectionDAG::MaxRecursionDepth && - "getNegatedExpression doesn't match getNegatibleCost"); - - // Pre-increment recursion depth for use in recursive calls. - ++Depth; - const SDNodeFlags Flags = Op->getFlags(); - EVT VT = Op.getValueType(); - unsigned Opcode = Op.getOpcode(); - SDLoc DL(Op); + if (LegalOps && !IsOpLegal) + break; - switch (Opcode) { - case ISD::ConstantFP: { - APFloat V = cast(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, DL, VT); - } - case ISD::BUILD_VECTOR: { SmallVector Ops; for (SDValue C : Op->op_values()) { if (C.isUndef()) { @@ -5721,85 +5637,150 @@ V.changeSign(); Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType())); } + Cost = NegatibleCost::Neutral; return DAG.getBuildVector(VT, DL, Ops); } case ISD::FADD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + break; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT)) + break; SDValue X = Op.getOperand(0), Y = Op.getOperand(1); - assert((DAG.getTarget().Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) && - "Expected NSZ fp-flag"); // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y) - NegatibleCost CostX = getNegatibleCost(X, DAG, LegalOps, OptForSize, Depth); - if (CostX != NegatibleCost::Expensive) - return DAG.getNode( - ISD::FSUB, DL, VT, - getNegatedExpression(X, DAG, LegalOps, OptForSize, Depth), Y, Flags); - + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) - return DAG.getNode( - ISD::FSUB, DL, VT, - getNegatedExpression(Y, DAG, LegalOps, OptForSize, Depth), X, Flags); + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = CostX; + assert(Cost != NegatibleCost::Expensive && + "Can't negate expr if the cost is expensive"); + return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags); + } + + // Negate the Y if it is not expensive. + if (NegY) { + Cost = CostY; + assert(Cost != NegatibleCost::Expensive && + "Can't negate expr if the cost is expensive"); + return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags); + } + break; } case ISD::FSUB: { + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + break; + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); // fold (fneg (fsub 0, Y)) -> Y if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true)) - if (C->isZero()) + if (C->isZero()) { + Cost = NegatibleCost::Cheaper; return Y; + } // fold (fneg (fsub X, Y)) -> (fsub Y, X) + Cost = NegatibleCost::Neutral; return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags); } case ISD::FMUL: case ISD::FDIV: { SDValue X = Op.getOperand(0), Y = Op.getOperand(1); - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - NegatibleCost CostX = getNegatibleCost(X, DAG, LegalOps, OptForSize, Depth); - if (CostX != NegatibleCost::Expensive) - return DAG.getNode( - Opcode, DL, VT, - getNegatedExpression(X, DAG, LegalOps, OptForSize, Depth), Y, Flags); + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode( - Opcode, DL, VT, X, - getNegatedExpression(Y, DAG, LegalOps, OptForSize, Depth), Flags); + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = CostX; + assert(Cost != NegatibleCost::Expensive && + "Can't negate expr if the cost is expensive"); + return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags); + } + + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + break; + + // Negate the Y if it is not expensive. + if (NegY) { + Cost = CostY; + assert(Cost != NegatibleCost::Expensive && + "Can't negate expr if the cost is expensive"); + return DAG.getNode(Opcode, DL, VT, X, NegY, Flags); + } + break; } case ISD::FMA: case ISD::FMAD: { - assert((DAG.getTarget().Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) && - "Expected NSZ fp-flag"); + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + break; SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2); - SDValue NegZ = getNegatedExpression(Z, DAG, LegalOps, OptForSize, Depth); - NegatibleCost CostX = getNegatibleCost(X, DAG, LegalOps, OptForSize, Depth); - NegatibleCost CostY = getNegatibleCost(Y, DAG, LegalOps, OptForSize, Depth); - if (CostX <= CostY) { - // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) - SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, Depth); + NegatibleCost CostZ = NegatibleCost::Expensive; + SDValue NegZ = + getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth); + // Give up if fail to negate the Z. + if (!NegZ) + break; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = std::min(CostX, std::min(CostY, CostZ)); + assert(Cost != NegatibleCost::Expensive && + "Can't negate expr if the cost is expensive"); return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags); } - // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) - SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, Depth); - return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); + // Negate the Y if it is not expensive. + if (NegY) { + Cost = std::min(CostY, CostZ); + assert(Cost != NegatibleCost::Expensive && + "Can't negate expr if the cost is expensive"); + return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); + } + break; } case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Opcode, DL, VT, - getNegatedExpression(Op.getOperand(0), DAG, LegalOps, - OptForSize, Depth)); + if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps, + OptForSize, Cost, Depth)) + return DAG.getNode(Opcode, DL, VT, NegV); + break; case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, DL, VT, - getNegatedExpression(Op.getOperand(0), DAG, LegalOps, - OptForSize, Depth), - Op.getOperand(1)); + if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps, + OptForSize, Cost, Depth)) + return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1)); + break; } - llvm_unreachable("Unknown code"); + return SDValue(); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -170,9 +170,10 @@ bool isZExtFree(EVT Src, EVT Dest) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; - NegatibleCost getNegatibleCost(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const override; + SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + NegatibleCost &Cost, + unsigned Depth) const override; bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -756,24 +756,25 @@ } } -TargetLowering::NegatibleCost -AMDGPUTargetLowering::getNegatibleCost(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const { +SDValue AMDGPUTargetLowering::getNegatedExpression( + SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, + NegatibleCost &Cost, unsigned Depth) const { switch (Op.getOpcode()) { case ISD::FMA: case ISD::FMAD: { // Negating a fma is not free if it has users without source mods. - if (!allUsesHaveSourceMods(Op.getNode())) - return NegatibleCost::Expensive; + if (!allUsesHaveSourceMods(Op.getNode())) { + Cost = NegatibleCost::Expensive; + return SDValue(); + } break; } default: break; } - return TargetLowering::getNegatibleCost(Op, DAG, LegalOperations, ForCodeSize, - Depth); + return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, + ForCodeSize, Cost, Depth); } //===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -813,16 +813,11 @@ /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; - /// Returns whether computing the negated form of the specified expression - /// is more expensive, the same cost or cheaper. - NegatibleCost getNegatibleCost(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const override; - - /// If getNegatibleCost returns Neutral/Cheaper, return the newly negated - /// expression. + /// Return the newly negated expression if the cost is the same or cheaper + /// and set the cost in \p Cost. SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, + NegatibleCost &Cost, unsigned Depth) const override; MachineBasicBlock * diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44085,68 +44085,23 @@ bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); bool LegalOperations = !DCI.isBeforeLegalizeOps(); - if (TLI.getNegatibleCost(Arg, DAG, LegalOperations, CodeSize) != - TargetLowering::NegatibleCost::Expensive) - return DAG.getBitcast( - OrigVT, TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize)); + if (SDValue NegArg = + TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize)) + return DAG.getBitcast(OrigVT, NegArg); return SDValue(); } -TargetLowering::NegatibleCost -X86TargetLowering::getNegatibleCost(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const { - // fneg patterns are removable even if they have multiple uses. - if (isFNEG(DAG, Op.getNode(), Depth)) - return NegatibleCost::Cheaper; - - // Don't recurse exponentially. - if (Depth > SelectionDAG::MaxRecursionDepth) - return NegatibleCost::Expensive; - - EVT VT = Op.getValueType(); - EVT SVT = VT.getScalarType(); - switch (Op.getOpcode()) { - case ISD::FMA: - case X86ISD::FMSUB: - case X86ISD::FNMADD: - case X86ISD::FNMSUB: - case X86ISD::FMADD_RND: - case X86ISD::FMSUB_RND: - case X86ISD::FNMADD_RND: - case X86ISD::FNMSUB_RND: { - if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || - !(SVT == MVT::f32 || SVT == MVT::f64) || - !isOperationLegal(ISD::FMA, VT)) - break; - - // This is always negatible for free but we might be able to remove some - // extra operand negations as well. - for (int i = 0; i != 3; ++i) { - NegatibleCost V = getNegatibleCost(Op.getOperand(i), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (V == NegatibleCost::Cheaper) - return V; - } - return NegatibleCost::Neutral; - } - case X86ISD::FRCP: - return getNegatibleCost(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1); - } - - return TargetLowering::getNegatibleCost(Op, DAG, LegalOperations, ForCodeSize, - Depth); -} - SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, + NegatibleCost &Cost, unsigned Depth) const { // fneg patterns are removable even if they have multiple uses. - if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) + if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) { + Cost = NegatibleCost::Cheaper; return DAG.getBitcast(Op.getValueType(), Arg); + } EVT VT = Op.getValueType(); EVT SVT = VT.getScalarType(); @@ -44168,13 +44123,9 @@ // This is always negatible for free but we might be able to remove some // extra operand negations as well. SmallVector NewOps(Op.getNumOperands(), SDValue()); - for (int i = 0; i != 3; ++i) { - NegatibleCost V = getNegatibleCost(Op.getOperand(i), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (V == NegatibleCost::Cheaper) - NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations, - ForCodeSize, Depth + 1); - } + for (int i = 0; i != 3; ++i) + NewOps[i] = getCheaperNegatedExpression( + Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1); bool NegA = !!NewOps[0]; bool NegB = !!NewOps[1]; @@ -44185,17 +44136,21 @@ for (int i = 0, e = Op.getNumOperands(); i != e; ++i) if (!NewOps[i]) NewOps[i] = Op.getOperand(i); + + Cost = (NegA || NegB || NegC) ? NegatibleCost::Cheaper + : NegatibleCost::Neutral; return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); } case X86ISD::FRCP: - return DAG.getNode(Opc, SDLoc(Op), VT, - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); + if (SDValue NegOp0 = + getNegatedExpression(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Cost, Depth + 1)) + return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0); + break; } return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, - ForCodeSize, Depth); + ForCodeSize, Cost, Depth); } static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, @@ -45088,9 +45043,9 @@ auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); bool LegalOperations = !DCI.isBeforeLegalizeOps(); - if (TLI.getNegatibleCost(V, DAG, LegalOperations, CodeSize) == - TargetLowering::NegatibleCost::Cheaper) { - V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize); + if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations, + CodeSize)) { + V = NegV; return true; } // Look through extract_vector_elts. If it comes from an FNEG, create a @@ -45098,12 +45053,10 @@ if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(V.getOperand(1))) { SDValue Vec = V.getOperand(0); - if (TLI.getNegatibleCost(Vec, DAG, LegalOperations, CodeSize) == - TargetLowering::NegatibleCost::Cheaper) { - SDValue NegVal = - TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize); + if (SDValue NegV = TLI.getCheaperNegatedExpression( + Vec, DAG, LegalOperations, CodeSize)) { V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), - NegVal, V.getOperand(1)); + NegV, V.getOperand(1)); return true; } } @@ -45145,11 +45098,11 @@ bool LegalOperations = !DCI.isBeforeLegalizeOps(); SDValue N2 = N->getOperand(2); - if (TLI.getNegatibleCost(N2, DAG, LegalOperations, CodeSize) != - TargetLowering::NegatibleCost::Cheaper) - return SDValue(); - SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize); + SDValue NegN2 = + TLI.getCheaperNegatedExpression(N2, DAG, LegalOperations, CodeSize); + if (!NegN2) + return SDValue(); unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); if (N->getNumOperands() == 4) diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -65,8 +65,8 @@ ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 ; CHECK-NEXT: qvfmuls 4, 3, 3 -; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 -; CHECK-NEXT: qvfmadds 0, 2, 4, 0 +; CHECK-NEXT: qvfmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmul 1, 1, 0 ; CHECK-NEXT: blr @@ -182,8 +182,8 @@ ; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 ; CHECK-NEXT: qvfmuls 4, 3, 3 -; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 -; CHECK-NEXT: qvfmadds 0, 2, 4, 0 +; CHECK-NEXT: qvfmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmuls 1, 1, 0 ; CHECK-NEXT: blr @@ -408,8 +408,8 @@ ; CHECK-NEXT: addis 3, 2, .LCPI16_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI16_0@toc@l ; CHECK-NEXT: qvfmuls 4, 2, 2 -; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 3, 4, 0 +; CHECK-NEXT: qvfmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI16_2@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI16_2@toc@l @@ -435,8 +435,8 @@ ; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l ; CHECK-NEXT: qvfmuls 4, 2, 2 -; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 3, 4, 0 +; CHECK-NEXT: qvfmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: qvfmuls 0, 2, 0 ; CHECK-NEXT: qvfmuls 0, 0, 1 diff --git a/llvm/test/CodeGen/X86/neg_fp.ll b/llvm/test/CodeGen/X86/neg_fp.ll --- a/llvm/test/CodeGen/X86/neg_fp.ll +++ b/llvm/test/CodeGen/X86/neg_fp.ll @@ -54,3 +54,30 @@ %t18 = fadd double %t16, %t7 ret double %t18 } + +; This would crash because the negated expression for %sub4 +; creates a new use of %sub1 and that alters the negated cost + +define float @fdiv_extra_use_changes_cost(float %a0, float %a1, float %a2) nounwind { +; CHECK-LABEL: fdiv_extra_use_changes_cost: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss {{[0-9]+}}(%esp), %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm2 +; CHECK-NEXT: mulss %xmm0, %xmm2 +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: divss %xmm2, %xmm0 +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: flds (%esp) +; CHECK-NEXT: popl %eax +; CHECK-NEXT: retl + %sub1 = fsub fast float %a0, %a1 + %mul2 = fmul fast float %sub1, %a2 + %neg = fneg fast float %a0 + %add3 = fadd fast float %a1, %neg + %sub4 = fadd fast float %add3, %a2 + %div5 = fdiv fast float %sub4, %mul2 + ret float %div5 +}