Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -670,7 +670,7 @@ SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef Ops); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, - ArrayRef Ops); + ArrayRef Ops, const SDNodeFlags *Flags = nullptr); SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef ResultTys, ArrayRef Ops); SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -343,10 +343,12 @@ SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); - SDValue BuildReciprocalEstimate(SDValue Op); - SDValue BuildRsqrtEstimate(SDValue Op); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -622,12 +624,14 @@ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), + &cast(Op)->Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), - Op.getOperand(0)); + Op.getOperand(0), + &cast(Op)->Flags); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. assert(Options.UnsafeFPMath); @@ -639,7 +643,8 @@ // fold (fneg (fsub A, B)) -> (fsub B, A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0)); + Op.getOperand(1), Op.getOperand(0), + &cast(Op)->Flags); case ISD::FMUL: case ISD::FDIV: @@ -651,13 +656,15 @@ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), + &cast(Op)->Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, Depth+1)); + LegalOperations, Depth+1), + &cast(Op)->Flags); case ISD::FP_EXTEND: case ISD::FSIN: @@ -7855,6 +7862,7 @@ EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) @@ -7863,23 +7871,23 @@ // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations)); + GetNegatedExpression(N0, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -7895,7 +7903,9 @@ if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, + Flags), + Flags); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) @@ -7916,8 +7926,8 @@ // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) @@ -7925,8 +7935,8 @@ N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); } } @@ -7937,8 +7947,8 @@ // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) @@ -7946,8 +7956,8 @@ N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); } } @@ -7957,7 +7967,7 @@ if (!CFP && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, - N1, DAG.getConstantFP(3.0, DL, VT)); + N1, DAG.getConstantFP(3.0, DL, VT), Flags); } } @@ -7967,7 +7977,7 @@ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, - N0, DAG.getConstantFP(3.0, DL, VT)); + N0, DAG.getConstantFP(3.0, DL, VT), Flags); } } @@ -7977,8 +7987,8 @@ N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, DL, VT, - N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), + DAG.getConstantFP(4.0, DL, VT), Flags); } } } // enable-unsafe-fp-math @@ -8000,6 +8010,7 @@ EVT VT = N->getValueType(0); SDLoc dl(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8008,12 +8019,12 @@ // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -8064,6 +8075,7 @@ EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) { @@ -8074,12 +8086,12 @@ // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -8108,8 +8120,8 @@ // the second operand of the outer multiply are constants. if ((N1CFP && isConstOrConstSplatFP(N01)) || (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); } } } @@ -8122,14 +8134,14 @@ (N0.getOperand(0) == N0.getOperand(1)) && N0.hasOneUse()) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, DL, VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) @@ -8144,7 +8156,8 @@ if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } @@ -8174,10 +8187,12 @@ if (N1CFP && N1CFP->isZero()) return N2; } + // FIXME: FMA nodes should have fast-math-flags. + SDNodeFlags Flags; if (N0CFP && N0CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2, &Flags); if (N1CFP && N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2, &Flags); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (N0CFP && !N1CFP) @@ -8188,8 +8203,10 @@ N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + Flags.setUnsafeAlgebra(true); return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + &Flags), &Flags); } @@ -8197,9 +8214,11 @@ if (Options.UnsafeFPMath && N0.getOpcode() == ISD::FMUL && N1CFP && N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + Flags.setUnsafeAlgebra(true); return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + &Flags), N2); } @@ -8207,30 +8226,34 @@ // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + return DAG.getNode(ISD::FADD, dl, VT, N0, N2, &Flags); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); AddToWorklist(RHSNeg.getNode()); - return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg, &Flags); } } // (fma x, c, x) -> (fmul x, (c+1)) - if (Options.UnsafeFPMath && N1CFP && N0 == N2) + if (Options.UnsafeFPMath && N1CFP && N0 == N2) { + Flags.setUnsafeAlgebra(true); return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT))); - + N1, DAG.getConstantFP(1.0, dl, VT), + &Flags), &Flags); + } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + Flags.setUnsafeAlgebra(true); return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT))); + N1, DAG.getConstantFP(-1.0, dl, VT), + &Flags), &Flags); + } - return SDValue(); } @@ -8273,17 +8296,15 @@ EVT VT = N->getValueType(0); SDLoc DL(N); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // FIXME: This optimization requires some level of fast-math, so the - // created reciprocal node should at least have the 'allowReciprocal' - // fast-math-flag set. - SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); + const SDNodeFlags *Flags = &cast(N)->Flags; + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); // Dividend / Divisor -> Dividend * Reciprocal for (auto *U : Users) { SDValue Dividend = U->getOperand(0); if (Dividend != FPOne) { SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, - Reciprocal); + Reciprocal, Flags); CombineTo(U, NewNode); } else if (U != Reciprocal.getNode()) { // In the absence of fast-math-flags, this user node is always the @@ -8302,6 +8323,7 @@ EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8310,7 +8332,7 @@ // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); if (Options.UnsafeFPMath) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. @@ -8329,28 +8351,30 @@ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); + DAG.getConstantFP(Recip, DL, VT), Flags); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, @@ -8367,18 +8391,18 @@ if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { - RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1)) { + if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } @@ -8390,7 +8414,8 @@ if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } @@ -8409,7 +8434,8 @@ // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, + &cast(N)->Flags); return SDValue(); } @@ -8418,14 +8444,18 @@ if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) return SDValue(); + // FIXME: FSQRT nodes should have fast-math-flags. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); if (!RV) return SDValue(); EVT VT = RV.getValueType(); SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. @@ -8840,9 +8870,10 @@ if (Level >= AfterLegalizeDAG && (TLI.isFPImmLegal(CVal, N->getValueType(0)) || TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) - return DAG.getNode( - ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N0.getOperand(1)), + &cast(N0)->Flags); } } @@ -13116,8 +13147,13 @@ VT = RVT; } } - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, - LHSOp, RHSOp); + SDValue FoldOp; + if (auto *FlagsNode = dyn_cast(N)) + FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, LHSOp, RHSOp, + &FlagsNode->Flags); + else + FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, LHSOp, RHSOp); + if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && FoldOp.getOpcode() != ISD::ConstantFP) @@ -13147,8 +13183,13 @@ if (SVN0->getMask().equals(SVN1->getMask())) { EVT VT = N->getValueType(0); SDValue UndefVector = LHS.getOperand(1); - SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - LHS.getOperand(0), RHS.getOperand(0)); + SDValue NewBinOp; + if (auto *FlagsNode = dyn_cast(N)) + NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS.getOperand(0), + RHS.getOperand(0), &FlagsNode->Flags); + else + NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS.getOperand(0), + RHS.getOperand(0)); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, &SVN0->getMask()[0]); @@ -13706,7 +13747,7 @@ return S; } -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13730,16 +13771,16 @@ // Newton iterations: Est = Est + Est (1 - Arg * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } } @@ -13756,31 +13797,32 @@ /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } return Est; @@ -13792,7 +13834,8 @@ /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -13800,25 +13843,25 @@ // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); AddToWorklist(HalfEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); AddToWorklist(Est.getNode()); } return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13830,8 +13873,8 @@ AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations) : - BuildRsqrtNRTwoConst(Op, Est, Iterations); + BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : + BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); } return Est; } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2420,6 +2420,8 @@ SDValue Op0, EVT DestVT, SDLoc dl) { + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2464,7 +2466,7 @@ BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // subtract the bias - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias, &Flags); // final result SDValue Result; // handle final rounding @@ -2504,8 +2506,8 @@ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, - TwoP84PlusTwoP52); - return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); + TwoP84PlusTwoP52, &Flags); + return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub, &Flags); } // Implementation of unsigned i64 to f32. @@ -2524,7 +2526,7 @@ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); - SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt, &Flags); // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right @@ -2561,10 +2563,10 @@ SDValue TwoP32 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, MVT::f64); - SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt, &Flags); SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); - SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2, &Flags); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, DAG.getIntPtrConstant(0, dl)); } @@ -2617,7 +2619,7 @@ FudgeInReg = Handle.getValue(); } - return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg, &Flags); } /// This function is responsible for legalizing a @@ -3090,9 +3092,10 @@ Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + SDNodeFlags Flags; False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, DAG.getNode(ISD::FSUB, dl, VT, - Node->getOperand(0), Tmp1)); + Node->getOperand(0), Tmp1, &Flags)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, dl, NVT)); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); @@ -3298,12 +3301,14 @@ case ISD::FCOPYSIGN: Results.push_back(ExpandFCOPYSIGN(Node)); break; - case ISD::FNEG: + case ISD::FNEG:{ // Expand Y = FNEG(X) -> Y = SUB -0.0, X Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); + SDNodeFlags Flags; Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, - Node->getOperand(0)); + Node->getOperand(0), &Flags); Results.push_back(Tmp1); + } break; case ISD::FABS: { // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). @@ -3527,8 +3532,12 @@ EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(Node)) + Flags = &FlagsNode->Flags; + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); } else { Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, @@ -4279,9 +4288,13 @@ case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FPOW: { + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(Node)) + Flags = &FlagsNode->Flags; + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); - Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Flags); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1341,10 +1341,11 @@ break; } + SDNodeFlags Flags; Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), - dl, MVT::ppcf128)); + dl, MVT::ppcf128), &Flags); Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); @@ -1511,13 +1512,14 @@ SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. + SDNodeFlags Flags; return DAG.getSelectCC(dl, N->getOperand(0), Tmp, DAG.getNode(ISD::ADD, dl, MVT::i32, DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, N->getOperand(0), - Tmp)), + Tmp, &Flags)), DAG.getConstant(0x80000000, dl, MVT::i32)), DAG.getNode(ISD::FP_TO_SINT, dl, @@ -1912,8 +1914,10 @@ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); SDValue Op1 = GetPromotedFloat(N->getOperand(1)); - - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(N)) + Flags = &FlagsNode->Flags; + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Flags); } SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -415,8 +415,11 @@ else Operands[j] = Op.getOperand(j); } + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(Op.getNode())) + Flags = &FlagsNode->Flags; - Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Flags); if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) @@ -1001,12 +1004,14 @@ // Convert hi and lo to floats // Convert the hi part back to the upper values + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); - fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW, &Flags); SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); // Add the two halves - return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO, &Flags); } @@ -1014,8 +1019,10 @@ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + // FIXME: FNEG node should have fast-math-flags. + SDNodeFlags Flags; return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), - Zero, Op.getOperand(0)); + Zero, Op.getOperand(0), &Flags); } return DAG.UnrollVectorOp(Op.getNode()); } Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -140,8 +140,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(N)) + Flags = &FlagsNode->Flags; return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + LHS.getValueType(), LHS, RHS, Flags); } SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { @@ -703,9 +706,12 @@ SDValue RHSLo, RHSHi; GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); + unsigned Opcode = N->getOpcode(); + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(N)) + Flags = &FlagsNode->Flags; + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -2057,7 +2063,10 @@ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(N)) + Flags = &FlagsNode->Flags; + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { @@ -2068,6 +2077,10 @@ EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(N)) + Flags = &FlagsNode->Flags; + while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2077,7 +2090,7 @@ // Operation doesn't trap so just widen as normal. SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } // No legal vector version so unroll the vector operation and then widen. @@ -2107,7 +2120,7 @@ SDValue EOp2 = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; } @@ -2125,7 +2138,7 @@ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); + EOp1, EOp2, Flags); } CurNumElts = 0; } @@ -2215,6 +2228,9 @@ unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); + SDNodeFlags *Flags = nullptr; + if (auto *FlagsNode = dyn_cast(N)) + Flags = &FlagsNode->Flags; if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); @@ -2223,7 +2239,7 @@ if (InVTNumElts == WidenNumElts) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); - return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } } @@ -2244,7 +2260,7 @@ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); - return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } if (InVTNumElts % WidenNumElts == 0) { @@ -2254,7 +2270,7 @@ // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); - return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); } } @@ -2270,7 +2286,7 @@ if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else - Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); } SDValue UndefVal = DAG.getUNDEF(EltVT); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3134,9 +3134,11 @@ break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) - return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0)); + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + // FIXME: FNEG node has no fast-math-flags to propagate. + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0), + &cast(Operand.getNode())->Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getNode()->getOperand(0); break; @@ -3284,8 +3286,27 @@ return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } +static bool hasFMF(unsigned Opcode) { + switch (Opcode) { + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FREM: + case ISD::FSUB: + return true; + default: + return false; + } +} + + SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { + + if (hasFMF(Opcode)) + assert(Flags && "got no flags"); + + ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); switch (Opcode) { @@ -5329,12 +5350,12 @@ } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - ArrayRef Ops) { + ArrayRef Ops, const SDNodeFlags *Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); - case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; } @@ -6822,7 +6843,11 @@ switch (N->getOpcode()) { default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); + if (auto *FlagsNode = dyn_cast(N)) + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, + &FlagsNode->Flags)); + else + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); break; case ISD::VSELECT: Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -79,7 +79,7 @@ cl::init(0)); static cl::opt -EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); // Limit the width of DAG chains. This is important in general to prevent @@ -3527,12 +3527,14 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, SelectionDAG &DAG) { + // FIXME: What opt flags should be set here? + SDNodeFlags Flags; // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); // FractionalPartOfX = t0 - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1, &Flags); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( @@ -3550,12 +3552,12 @@ // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304, dl)); + getF32Constant(DAG, 0x3e814304, dl), &Flags); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3f3c50c8, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e, dl)); + getF32Constant(DAG, 0x3f7f5e7e, dl), &Flags); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3566,15 +3568,15 @@ // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3, dl)); + getF32Constant(DAG, 0x3da235e3, dl), &Flags); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3e65b8f3, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x3f324b07, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd, dl)); + getF32Constant(DAG, 0x3f7ff8fd, dl), &Flags); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3587,24 +3589,24 @@ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e, dl)); + getF32Constant(DAG, 0x3924b03e, dl), &Flags); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3ab24b87, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x3c1d8c17, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d, dl)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + getF32Constant(DAG, 0x3d634a1d, dl), &Flags); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X, &Flags); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14, dl)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + getF32Constant(DAG, 0x3e75fe14, dl), &Flags); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X, &Flags); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234, dl)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + getF32Constant(DAG, 0x3f317234, dl), &Flags); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X, &Flags); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000, dl)); + getF32Constant(DAG, 0x3f800000, dl), &Flags); } // Add the exponent into the result in integer domain. @@ -3625,8 +3627,10 @@ // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe + // FIXME: What opt flags should be set here? + SDNodeFlags Flags; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b, dl)); + getF32Constant(DAG, 0x3fb8aa3b, dl), &Flags); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -3644,8 +3648,10 @@ // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + // FIXME: What opt flags should be set here? + SDNodeFlags Flags; SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218, dl)); + getF32Constant(DAG, 0x3f317218, dl), &Flags); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -3661,12 +3667,12 @@ // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbe74c456, dl)); + getF32Constant(DAG, 0xbe74c456, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3fb3a2b1, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3fb3a2b1, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29, dl)); + getF32Constant(DAG, 0x3f949a29, dl), &Flags); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3678,18 +3684,18 @@ // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbd67b6d6, dl)); + getF32Constant(DAG, 0xbd67b6d6, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ee4f4b8, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3ee4f4b8, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fbc278b, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3fbc278b, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40348e95, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x40348e95, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a, dl)); + getF32Constant(DAG, 0x3fdef31a, dl), &Flags); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3703,27 +3709,27 @@ // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbc91e5ac, dl)); + getF32Constant(DAG, 0xbc91e5ac, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e4350aa, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3e4350aa, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f60d3e3, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3f60d3e3, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x4011cdf0, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x4011cdf0, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x406cfd1c, dl)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + getF32Constant(DAG, 0x406cfd1c, dl), &Flags); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X, &Flags); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x408797cb, dl)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + getF32Constant(DAG, 0x408797cb, dl), &Flags); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X, &Flags); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab, dl)); + getF32Constant(DAG, 0x4006dcab, dl), &Flags); } - return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa, &Flags); } // No special expansion. @@ -3748,6 +3754,8 @@ // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. SDValue Log2ofMantissa; + // FIXME: What opt flags should be set here? + SDNodeFlags Flags; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3755,12 +3763,12 @@ // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbeb08fe0, dl)); + getF32Constant(DAG, 0xbeb08fe0, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x40019463, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x40019463, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d, dl)); + getF32Constant(DAG, 0x3fd6633d, dl), &Flags); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3772,18 +3780,18 @@ // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbda7262e, dl)); + getF32Constant(DAG, 0xbda7262e, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f25280b, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3f25280b, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x4007b923, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x4007b923, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40823e2f, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x40823e2f, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c, dl)); + getF32Constant(DAG, 0x4020d29c, dl), &Flags); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3798,27 +3806,27 @@ // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbcd2769e, dl)); + getF32Constant(DAG, 0xbcd2769e, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e8ce0b9, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3e8ce0b9, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fa22ae7, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3fa22ae7, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40525723, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x40525723, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x40aaf200, dl)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + getF32Constant(DAG, 0x40aaf200, dl), &Flags); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X, &Flags); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x40c39dad, dl)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + getF32Constant(DAG, 0x40c39dad, dl), &Flags); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X, &Flags); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c, dl)); + getF32Constant(DAG, 0x4042902c, dl), &Flags); } - return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa, &Flags); } // No special expansion. @@ -3831,12 +3839,14 @@ const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + // FIXME: What opt flags should be set here? + SDNodeFlags Flags; SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3e9a209a, dl)); + getF32Constant(DAG, 0x3e9a209a, dl), &Flags); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -3852,12 +3862,12 @@ // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbdd49a13, dl)); + getF32Constant(DAG, 0xbdd49a13, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f1c0789, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3f1c0789, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300, dl)); + getF32Constant(DAG, 0x3f011300, dl), &Flags); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3868,15 +3878,15 @@ // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3d431f31, dl)); + getF32Constant(DAG, 0x3d431f31, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ea21fb2, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3ea21fb2, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f6ae232, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3f6ae232, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3, dl)); + getF32Constant(DAG, 0x3f25f7c3, dl), &Flags); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3889,24 +3899,24 @@ // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3c5d51ce, dl)); + getF32Constant(DAG, 0x3c5d51ce, dl), &Flags); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e00685a, dl)); - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + getF32Constant(DAG, 0x3e00685a, dl), &Flags); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X, &Flags); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3efb6798, dl)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + getF32Constant(DAG, 0x3efb6798, dl), &Flags); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X, &Flags); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f88d192, dl)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + getF32Constant(DAG, 0x3f88d192, dl), &Flags); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X, &Flags); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fc4316c, dl)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + getF32Constant(DAG, 0x3fc4316c, dl), &Flags); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X, &Flags); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70, dl)); + getF32Constant(DAG, 0x3f57ce70, dl), &Flags); } - return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa, &Flags); } // No special expansion. @@ -3939,13 +3949,15 @@ } if (IsExp10) { + // FIXME: What opt flags should be set here? + SDNodeFlags Flags; // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, - getF32Constant(DAG, 0x40549a78, dl)); + getF32Constant(DAG, 0x40549a78, dl), &Flags); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -3981,23 +3993,25 @@ // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; + // FIXME: Intrinsics should have optimization flags. + SDNodeFlags Flags; while (Val) { if (Val & 1) { if (Res.getNode()) - Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare, &Flags); else Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), - CurSquare, CurSquare); + CurSquare, CurSquare, &Flags); Val >>= 1; } // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), - DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res, &Flags); return Res; } } @@ -4634,14 +4648,16 @@ getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { + // FIXME: Intrinsic calls should have fast-math-flags. + SDNodeFlags Flags; SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + getValue(I.getArgOperand(1)), &Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, - getValue(I.getArgOperand(2))); + getValue(I.getArgOperand(2)), &Flags); setValue(&I, Add); } return nullptr; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1077,14 +1077,15 @@ SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); + SDNodeFlags Flags; SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, DAG.getConstantFP(1.0f, DL, MVT::f32), - Op.getOperand(1)); + Op.getOperand(1), &Flags); SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, - Op.getOperand(3)); + Op.getOperand(3), &Flags); return DAG.getNode(ISD::FADD, DL, VT, - DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), - OneSubAC); + DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2), &Flags), + OneSubAC, &Flags); } /// \brief Generate Min/Max node @@ -1630,9 +1631,11 @@ // float fb = (float)ib; SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib); + SDNodeFlags Flags; // float fq = native_divide(fa, fb); SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT, - fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb)); + fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb), + &Flags); // fq = trunc(fq); fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq); @@ -1642,7 +1645,8 @@ // float fr = mad(fqneg, fb, fa); SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT, - DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa); + DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb, &Flags), + fa, &Flags); // int iq = (int)fq; SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); @@ -1940,11 +1944,13 @@ SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); - SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y); + SDNodeFlags Flags; + + SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, &Flags); SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y, &Flags); - return DAG.getNode(ISD::FSUB, SL, VT, X, Mul); + return DAG.getNode(ISD::FSUB, SL, VT, X, Mul, &Flags); } SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { @@ -1968,7 +1974,8 @@ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); - return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); + SDNodeFlags Flags; + return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add, &Flags); } static SDValue extractF64Exponent(SDValue Hi, SDLoc SL, SelectionDAG &DAG) { @@ -2045,9 +2052,11 @@ SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64); SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); - SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); - SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); + SDNodeFlags Flags; + SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign, &Flags); + SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign, &Flags); + SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51"); @@ -2073,8 +2082,9 @@ SDValue X = Op.getOperand(0); SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X); + SDNodeFlags Flags; - SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T); + SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T, &Flags); SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff); @@ -2091,7 +2101,7 @@ SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero); - return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel); + return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel, &Flags); } SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const { @@ -2184,7 +2194,8 @@ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); - return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); + SDNodeFlags Flags; + return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add, &Flags); } SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, @@ -2206,8 +2217,8 @@ SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, DAG.getConstant(32, SL, MVT::i32)); - - return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); + SDNodeFlags Flags; + return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo, &Flags); } SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, @@ -2231,9 +2242,11 @@ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, DAG.getConstant(1, DL, MVT::i32)); SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); + SDNodeFlags Flags; FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, - DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32 - return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); + DAG.getConstantFP(4294967296.0f, DL, MVT::f32), + &Flags); // 2^32 + return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi, &Flags); } SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -2257,9 +2270,9 @@ MVT::f64); SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL, MVT::f64); + SDNodeFlags Flags; + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0, &Flags); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0); - SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul); Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -946,11 +946,12 @@ EVT VT = Op.getValueType(); SDValue Arg = Op.getOperand(0); SDLoc DL(Op); + SDNodeFlags Flags; SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, DAG.getNode(ISD::FADD, DL, VT, DAG.getNode(ISD::FMUL, DL, VT, Arg, - DAG.getConstantFP(0.15915494309, DL, MVT::f32)), - DAG.getConstantFP(0.5, DL, MVT::f32))); + DAG.getConstantFP(0.15915494309, DL, MVT::f32), &Flags), + DAG.getConstantFP(0.5, DL, MVT::f32), &Flags)); unsigned TrigNode; switch (Op.getOpcode()) { case ISD::FCOS: @@ -964,12 +965,12 @@ } SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, DAG.getNode(ISD::FADD, DL, VT, FractPart, - DAG.getConstantFP(-0.5, DL, MVT::f32))); + DAG.getConstantFP(-0.5, DL, MVT::f32), &Flags)); if (Gen >= AMDGPUSubtarget::R700) return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, - DAG.getConstantFP(3.14159265359, DL, MVT::f32)); + DAG.getConstantFP(3.14159265359, DL, MVT::f32), &Flags); } SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1023,6 +1023,8 @@ SDLoc DL(Op); unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); + SDNodeFlags Flags; + switch (IntrinsicID) { case Intrinsic::r600_read_ngroups_x: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), @@ -1104,7 +1106,7 @@ case AMDGPUIntrinsic::AMDGPU_fract: case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1), - DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1))); + DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)), &Flags); case AMDGPUIntrinsic::SI_fs_constant: { SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3)); SDValue Glue = M0.getValue(1); @@ -1271,8 +1273,10 @@ if (Unsafe) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags); } return SDValue(); @@ -1308,14 +1312,15 @@ SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT); SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One); + SDNodeFlags Flags; - r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); + r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3, &Flags); SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0, &Flags); - return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); + return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul, &Flags); } SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { @@ -1345,7 +1350,8 @@ SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X); SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3); + SDNodeFlags Flags; + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3, &Flags); SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Mul, DivScale1); @@ -1428,10 +1434,11 @@ SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Arg = Op.getOperand(0); + SDNodeFlags Flags; SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, DAG.getNode(ISD::FMUL, DL, VT, Arg, DAG.getConstantFP(0.5/M_PI, DL, - VT))); + VT), &Flags)); switch (Op.getOpcode()) { case ISD::FCOS: Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -6449,6 +6449,7 @@ static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { + SDNodeFlags Flags; // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); @@ -6465,7 +6466,7 @@ // without any newton steps. This requires that we use a weird bias // of 0xb000, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0xb000); - X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); + X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y, &Flags); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); Y = DAG.getConstant(0xb000, dl, MVT::i32); Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); @@ -6479,6 +6480,7 @@ static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { + SDNodeFlags Flags; SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); @@ -6497,12 +6499,12 @@ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), N1, N2); - N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); + N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2, &Flags); // Because short has a smaller range than ushort, we can actually get away // with only a single newton step. This requires that we use a weird bias // of 89, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0x89); - N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); + N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2, &Flags); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(0x89, dl, MVT::i32); N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); @@ -6551,6 +6553,7 @@ } static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { + SDNodeFlags Flags; EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); @@ -6604,16 +6607,16 @@ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); - N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); + N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2, &Flags); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); - N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); + N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2, &Flags); // Simply multiplying by the reciprocal estimate can leave us a few ulps // too low, so we add 2 ulps (exhaustive testing shows that this is enough, // and that it will never cause us to return an answer too large). // float4 result = as_float4(as_int4(xf*recip) + 2); - N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); + N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2, &Flags); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(2, dl, MVT::i32); N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -1786,9 +1786,12 @@ return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_fadd_w: - case Intrinsic::mips_fadd_d: + case Intrinsic::mips_fadd_d: { + // FIXME: Intrinsics should have fast-math-flags. + SDNodeFlags Flags; return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + Op->getOperand(2), &Flags); + } // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away case Intrinsic::mips_fceq_w: case Intrinsic::mips_fceq_d: @@ -1831,9 +1834,12 @@ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETUNE); case Intrinsic::mips_fdiv_w: - case Intrinsic::mips_fdiv_d: + case Intrinsic::mips_fdiv_d: { + // FIXME: Intrinsics should have fast-math-flags. + SDNodeFlags Flags; return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + Op->getOperand(2), &Flags); + } case Intrinsic::mips_ffint_u_w: case Intrinsic::mips_ffint_u_d: return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), @@ -1856,10 +1862,12 @@ } case Intrinsic::mips_fexp2_w: case Intrinsic::mips_fexp2_d: { + // FIXME: Intrinsics should have fast-math-flags. + SDNodeFlags Flags; EVT ResTy = Op->getValueType(0); return DAG.getNode( ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), - DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); + DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)), &Flags); } case Intrinsic::mips_flog2_w: case Intrinsic::mips_flog2_d: @@ -1869,15 +1877,21 @@ return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); case Intrinsic::mips_fmul_w: - case Intrinsic::mips_fmul_d: + case Intrinsic::mips_fmul_d: { + // FIXME: Intrinsics should have fast-math-flags. + SDNodeFlags Flags; return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + Op->getOperand(2), &Flags); + } case Intrinsic::mips_fmsub_w: case Intrinsic::mips_fmsub_d: { + // FIXME: Intrinsics should have fast-math-flags. + SDNodeFlags Flags; EVT ResTy = Op->getValueType(0); return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, - Op->getOperand(2), Op->getOperand(3))); + Op->getOperand(2), Op->getOperand(3), + &Flags), &Flags); } case Intrinsic::mips_frint_w: case Intrinsic::mips_frint_d: @@ -1886,9 +1900,12 @@ case Intrinsic::mips_fsqrt_d: return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_fsub_w: - case Intrinsic::mips_fsub_d: + case Intrinsic::mips_fsub_d: { + // FIXME: Intrinsics should have fast-math-flags. + SDNodeFlags Flags; return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + Op->getOperand(2), &Flags); + } case Intrinsic::mips_ftrunc_u_w: case Intrinsic::mips_ftrunc_u_d: return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -5996,7 +5996,11 @@ if (!DAG.getTarget().Options.NoInfsFPMath || !DAG.getTarget().Options.NoNaNsFPMath) return Op; - + // Propagate flags based on global settings? + SDNodeFlags Flags; + Flags.setNoInfs(true); + Flags.setNoNaNs(true); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); @@ -6046,7 +6050,7 @@ case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -6056,25 +6060,25 @@ DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -12000,7 +12000,9 @@ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1, &Flags); SDValue Result; if (Subtarget->hasSSE3()) { @@ -12011,7 +12013,7 @@ SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32, S2F, 0x4E, DAG); Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, - DAG.getBitcast(MVT::v2f64, Shuffle), Sub); + DAG.getBitcast(MVT::v2f64, Shuffle), Sub, &Flags); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, @@ -12049,7 +12051,9 @@ DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); // Subtract the bias. - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias, &Flags); // Handle final rounding. EVT DestVT = Op.getValueType(); @@ -12161,11 +12165,13 @@ // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; SDValue FHigh = - DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); + DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd, &Flags); // return (float4) lo + fhi; SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); - return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); + return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh, &Flags); } SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, @@ -12280,7 +12286,9 @@ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, false, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. - SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; + SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge, &Flags); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0, dl)); } @@ -15503,8 +15511,14 @@ Mask, PassThru, Subtarget, DAG); } } - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, - Src1,Src2), + // FIXME: Intrinsics should have fast-math-flags. + if (isBinOpWithFlags(IntrData->Opc0)) { + SDNodeFlags Flags; + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, &Flags), + Mask, PassThru, Subtarget, DAG); + } + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_2OP_MASK_RM: { @@ -18864,7 +18878,9 @@ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, DAG.getBitcast(MVT::v2i64, VBias)); Or = DAG.getBitcast(MVT::v2f64, Or); - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); + // FIXME: What optimization flags should be set here? + SDNodeFlags Flags; + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias, &Flags); Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; }