Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -359,21 +359,22 @@ bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; - bool UnsafeAlgebra : 1; bool NoNaNs : 1; bool NoInfs : 1; bool NoSignedZeros : 1; bool AllowReciprocal : 1; bool VectorReduction : 1; bool AllowContract : 1; + bool AproximateFuncs : 1; + bool AllowReassociation : 1; public: /// Default constructor turns off all optimization flags. SDNodeFlags() : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false), + Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), - AllowContract(false) {} + AllowContract(false), AproximateFuncs(false), AllowReassociation(false) {} /// Sets the state of the flags to the defined state. void setDefined() { AnyDefined = true; } @@ -393,10 +394,6 @@ setDefined(); Exact = b; } - void setUnsafeAlgebra(bool b) { - setDefined(); - UnsafeAlgebra = b; - } void setNoNaNs(bool b) { setDefined(); NoNaNs = b; @@ -421,18 +418,32 @@ setDefined(); AllowContract = b; } + void setAproximateFuncs(bool b) { + setDefined(); + AproximateFuncs = b; + } + void setAllowReassociation(bool b) { + setDefined(); + AllowReassociation = b; + } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } bool hasNoSignedWrap() const { return NoSignedWrap; } bool hasExact() const { return Exact; } - bool hasUnsafeAlgebra() const { return UnsafeAlgebra; } bool hasNoNaNs() const { return NoNaNs; } bool hasNoInfs() const { return NoInfs; } bool hasNoSignedZeros() const { return NoSignedZeros; } bool hasAllowReciprocal() const { return AllowReciprocal; } bool hasVectorReduction() const { return VectorReduction; } bool hasAllowContract() const { return AllowContract; } + bool hasAproximateFuncs() const { return AproximateFuncs; } + bool hasAllowReassociation() const { return AllowReassociation; } + + bool isFast() const { + return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && + AllowContract && AproximateFuncs && AllowReassociation; + } /// Clear any flags in this flag set that aren't also set in Flags. /// If the given Flags are undefined then don't do anything. @@ -442,13 +453,14 @@ NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; - UnsafeAlgebra &= Flags.UnsafeAlgebra; NoNaNs &= Flags.NoNaNs; NoInfs &= Flags.NoInfs; NoSignedZeros &= Flags.NoSignedZeros; AllowReciprocal &= Flags.AllowReciprocal; VectorReduction &= Flags.VectorReduction; AllowContract &= Flags.AllowContract; + AproximateFuncs &= Flags.AproximateFuncs; + AllowReassociation &= Flags.AllowReassociation; } }; @@ -923,6 +935,12 @@ const SDNodeFlags getFlags() const { return Flags; } void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } + bool isFast() { + return Flags.hasNoSignedZeros() && Flags.hasAllowReciprocal() && + Flags.hasNoNaNs() && Flags.hasNoInfs() && + Flags.hasAllowContract() && Flags.hasAproximateFuncs() && + Flags.hasAllowReassociation(); + } /// Clear any flags in this node that aren't also set in Flags. /// If Flags is not in a defined state then this has no effect. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -678,6 +678,8 @@ // Don't recurse exponentially. if (Depth > 6) return 0; + bool UnsafeFPMath = Options->UnsafeFPMath || Op->isFast(); + switch (Op.getOpcode()) { default: return false; case ISD::ConstantFP: { @@ -691,7 +693,7 @@ } case ISD::FADD: // FIXME: determine better conditions for this xform. - if (!Options->UnsafeFPMath) return 0; + if (!UnsafeFPMath) return 0; // After operation legalization, it might not be legal to create new FSUBs. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) @@ -715,7 +717,7 @@ case ISD::FMUL: case ISD::FDIV: - if (Options->HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMathOption && !UnsafeFPMath) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, @@ -753,7 +755,7 @@ } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(Options.UnsafeFPMath); + assert(Options.UnsafeFPMath || Op->isFast()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -6742,10 +6744,12 @@ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. + SDValue Cmp = N0.getOperand(2); const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && + bool UnsafeFPMath = Options.UnsafeFPMath || Cmp->isFast(); + if (UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast(N0.getOperand(2))->get(); + ISD::CondCode CC = cast(Cmp)->get(); if (SDValue FMinMax = combineMinNumMaxNum( DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) @@ -9542,7 +9546,7 @@ static bool isContractable(SDNode *N) { SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasUnsafeAlgebra(); + return F.hasAllowContract(); } /// Try to perform FMA combining on a given FADD node. @@ -10202,7 +10206,7 @@ } // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.isFast()) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); @@ -10337,7 +10341,7 @@ GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -10348,7 +10352,7 @@ } // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.isFast()) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) return N0; @@ -10413,7 +10417,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.isFast()) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; @@ -10542,6 +10546,9 @@ SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + // FMA nodes have flags that propagate to the created nodes. + const SDNodeFlags Flags = N->getFlags(); + // Constant fold FMA. if (isa(N0) && isa(N1) && @@ -10566,11 +10573,6 @@ !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); - // TODO: FMA nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); - if (Options.UnsafeFPMath) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && @@ -10646,7 +10648,7 @@ // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { - bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath || N->isFast(); const SDNodeFlags Flags = N->getFlags(); if (!UnsafeMath && !Flags.hasAllowReciprocal()) return SDValue(); @@ -10724,7 +10726,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || N->isFast()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. @@ -10833,17 +10835,15 @@ } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath) + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && !Flags.isFast()) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); - // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + // FSQRT nodes have flags that propagate to the created nodes. return buildSqrtEstimate(N0, Flags); } @@ -11138,10 +11138,14 @@ // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + if (DAG.getTarget().Options.UnsafeFPMath || N->isFast() || N0IsTrunc) { + const SDNodeFlags Flags = N->getFlags(); SDLoc DL(N); - return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + SDValue Val = + DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + Val->setFlags(Flags); + return Val; } } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3317,7 +3317,7 @@ break; case ISD::FP_TO_FP16: DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); - if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + if (!TLI.useSoftFloat() && (TM.Options.UnsafeFPMath || Node->isFast())) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); if ((SVT == MVT::f64 || SVT == MVT::f80) && Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4042,7 +4042,7 @@ break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + if ((getTarget().Options.UnsafeFPMath || Operand.getNode()->isFast()) && OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Operand.getNode()->getFlags()); @@ -4435,7 +4435,7 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { + if (getTarget().Options.UnsafeFPMath || Flags.isFast()) { if (Opcode == ISD::FADD) { // x+0 --> x if (N2CFP && N2CFP->getValueAPF().isZero()) @@ -4811,7 +4811,7 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) + if (getTarget().Options.UnsafeFPMath || Flags.isFast()) return N2; break; case ISD::MUL: Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2762,7 +2762,8 @@ Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.isFast()); + Flags.setAproximateFuncs(FMF.approxFunc()); + Flags.setAllowReassociation(FMF.allowReassoc()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); @@ -4309,6 +4310,18 @@ } else Result = lowerRangeToAssertZExt(DAG, I, Result); + if (I.getType()->isFloatTy()) { + FastMathFlags FMF = I.getFastMathFlags(); + SDNodeFlags Flags; + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setAllowContract(FMF.allowContract()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setAproximateFuncs(FMF.approxFunc()); + Flags.setAllowReassociation(FMF.allowReassoc()); + Result->setFlags(Flags); + } setValue(&I, Result); } } @@ -5475,9 +5488,19 @@ case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } + FastMathFlags FMF = I.getFastMathFlags(); + SDNodeFlags Flags; + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setAllowContract(FMF.allowContract()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setAproximateFuncs(FMF.approxFunc()); + Flags.setAllowReassociation(FMF.allowReassoc()); setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); + getValue(I.getArgOperand(0)), Flags)); + return nullptr; } case Intrinsic::minnum: { Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -479,9 +479,6 @@ if (getFlags().hasExact()) OS << " exact"; - if (getFlags().hasUnsafeAlgebra()) - OS << " unsafe"; - if (getFlags().hasNoNaNs()) OS << " nnan"; @@ -497,6 +494,12 @@ if (getFlags().hasAllowContract()) OS << " contract"; + if (getFlags().hasAproximateFuncs()) + OS << " afn"; + + if (getFlags().hasAllowReassociation()) + OS << " reassoc"; + if (getFlags().hasVectorReduction()) OS << " vector-reduction"; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5077,7 +5077,7 @@ EVT VT = Operand.getValueType(); SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + Flags.setAllowReassociation(true); // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2) // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) @@ -5116,7 +5116,7 @@ EVT VT = Operand.getValueType(); SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + Flags.setAllowReassociation(true); // Newton reciprocal iteration: E * (2 - X * E) // AArch64 reciprocal iteration instruction: (2 - M * N) Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -5348,7 +5348,7 @@ EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || - Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal(); + Flags.hasAllowReassociation() || Flags.hasAllowReciprocal(); if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) return SDValue(); @@ -6698,8 +6698,8 @@ const TargetOptions &Options = DAG.getTarget().Options; if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || - (N0->getFlags().hasUnsafeAlgebra() && - N1->getFlags().hasUnsafeAlgebra())) && + (N0->getFlags().hasAllowReassociation() && + N1->getFlags().hasAllowReassociation())) && isFMAFasterThanFMulAndFAdd(VT)) { return ISD::FMA; } Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -7,9 +7,12 @@ define float @fast_recip_sqrt(float %x) { ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: @@ -53,9 +56,9 @@ define double @not_so_fast_mul_add(double %x) { ; X64-LABEL: not_so_fast_mul_add: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movsd {{.*}}(%rip), %xmm1 ; X64-NEXT: mulsd %xmm0, %xmm1 -; X64-NEXT: addsd %xmm1, %xmm0 +; X64-NEXT: mulsd {{.*}}(%rip), %xmm0 ; X64-NEXT: movsd %xmm1, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -64,7 +67,9 @@ ; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: fld %st(0) ; X86-NEXT: fmull {{\.LCPI.*}} -; X86-NEXT: fadd %st(0), %st(1) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fmull {{\.LCPI.*}} +; X86-NEXT: fxch %st(1) ; X86-NEXT: fstpl mul1 ; X86-NEXT: retl %m = fmul double %x, 4.2 @@ -80,10 +85,14 @@ define float @not_so_fast_recip_sqrt(float %x) { ; X64-LABEL: not_so_fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 -; X64-NEXT: movss %xmm1, {{.*}}(%rip) +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: sqrtss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss %xmm2, {{.*}}(%rip) ; X64-NEXT: retq ; ; X86-LABEL: not_so_fast_recip_sqrt: Index: test/CodeGen/X86/fmf-propagation.ll =================================================================== --- test/CodeGen/X86/fmf-propagation.ll +++ test/CodeGen/X86/fmf-propagation.ll @@ -3,8 +3,6 @@ ; This tests the propagation of fast-math-flags from IR instructions to SDNodeFlags. -; FIXME: 'afn' and 'reassoc' were dropped. With 'fast', 'reassoc' got renamed to 'unsafe'. - ; CHECK-LABEL: Initial selection DAG: %bb.0 'fmf_transfer:' ; CHECK: t5: f32 = fadd nsz t2, t4 @@ -12,9 +10,9 @@ ; CHECK-NEXT: t7: f32 = fadd nnan t6, t4 ; CHECK-NEXT: t8: f32 = fadd ninf t7, t4 ; CHECK-NEXT: t9: f32 = fadd contract t8, t4 -; CHECK-NEXT: t10: f32 = fadd t9, t4 -; CHECK-NEXT: t11: f32 = fadd t10, t4 -; CHECK-NEXT: t12: f32 = fadd unsafe nnan ninf nsz arcp contract t11, t4 +; CHECK-NEXT: t10: f32 = fadd afn t9, t4 +; CHECK-NEXT: t11: f32 = fadd reassoc t10, t4 +; CHECK-NEXT: t12: f32 = fadd nnan ninf nsz arcp contract afn reassoc t11, t4 ; CHECK: Optimized lowered selection DAG: %bb.0 'fmf_transfer:'