Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -2732,10 +2732,12 @@ return SDValue(); } - /// Indicate whether this target prefers to combine the given number of FDIVs - /// with the same divisor. - virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const { - return false; + /// Indicate whether this target prefers to combine FDIVs with the same + /// divisor. If the transform should never be done, return zero. If the + /// transform should be done, return the minimum number of divisor uses + /// that must exist. + virtual unsigned combineRepeatedFPDivisors() const { + return 0; } /// Hooks for building estimates in place of slower divisions and square Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8247,23 +8247,29 @@ if (!DAG.getTarget().Options.UnsafeFPMath) return SDValue(); + // Skip if current node is a reciprocal. SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - - // Skip if current node is a reciprocal. if (N0CFP && N0CFP->isExactlyValue(1.0)) return SDValue(); + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. SDValue N1 = N->getOperand(1); - SmallVector Users; + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); // Find all FDIV users of the same divisor. + SmallVector Users; for (auto *U : N1->uses()) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) Users.push_back(U); } - if (!TLI.combineRepeatedFPDivisors(Users.size())) + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) return SDValue(); EVT VT = N->getValueType(0); Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -477,7 +477,7 @@ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; ConstraintType getConstraintType(StringRef Constraint) const override; unsigned getRegisterByName(const char* RegName, EVT VT, Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9427,10 +9427,10 @@ return true; } -bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are three or more FDIVs. - return NumUsers > 2; + return 3; } TargetLoweringBase::LegalizeTypeAction Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -853,7 +853,7 @@ bool &UseOneConstNR) const override; SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9131,7 +9131,7 @@ return SDValue(); } -bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default @@ -9144,12 +9144,12 @@ // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: - return NumUsers > 2; + return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: - return NumUsers > 1; + return 2; } } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1124,7 +1124,7 @@ unsigned &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; }; namespace X86 { Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -13308,8 +13308,8 @@ /// This is because we still need one division to calculate the reciprocal and /// then we need two multiplies by that reciprocal as replacements for the /// original divisions. -bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { - return NumUsers > 1; +unsigned X86TargetLowering::combineRepeatedFPDivisors() const { + return 2; } static bool isAllOnes(SDValue V) {