Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20084,16 +20084,9 @@ // Newton iterations: Est = Est + Est (1 - Arg * Est) for (int i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } } return Est; @@ -20118,31 +20111,19 @@ // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); - AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. - if (!Reciprocal) { + if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - } return Est; } @@ -20168,13 +20149,8 @@ // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - AddToWorklist(AE.getNode()); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - AddToWorklist(AEE.getNode()); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); - AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20187,10 +20163,8 @@ // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } - AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); - AddToWorklist(Est.getNode()); } return Est; @@ -20247,16 +20221,11 @@ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - AddToWorklist(Fabs.getNode()); - AddToWorklist(IsDenorm.getNode()); - AddToWorklist(Est.getNode()); } else { // X == 0.0 ? 0.0 : Est SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - AddToWorklist(IsZero.getNode()); - AddToWorklist(Est.getNode()); } } } Index: llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll +++ llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll @@ -57,8 +57,6 @@ ret <4 x double> %r } -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsubs define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind { ; CHECK-LABEL: foof_fmf: ; CHECK: # %bb.0: # %entry @@ -66,12 +64,9 @@ ; CHECK-NEXT: qvfrsqrtes 3, 2 ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI2_1@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI2_1@toc@l -; CHECK-NEXT: qvlfsx 4, 0, 3 -; CHECK-NEXT: qvfmadds 0, 2, 0, 2 -; CHECK-NEXT: qvfmuls 2, 3, 3 -; CHECK-NEXT: qvfmadds 0, 0, 2, 4 +; CHECK-NEXT: qvfmuls 4, 3, 3 +; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfmadds 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmul 1, 1, 0 ; CHECK-NEXT: blr @@ -179,8 +174,6 @@ ret <4 x float> %r } -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsubs define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind { ; CHECK-LABEL: goo_fmf: ; CHECK: # %bb.0: # %entry @@ -188,12 +181,9 @@ ; CHECK-NEXT: qvfrsqrtes 3, 2 ; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI6_1@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI6_1@toc@l -; CHECK-NEXT: qvlfsx 4, 0, 3 -; CHECK-NEXT: qvfmadds 0, 2, 0, 2 -; CHECK-NEXT: qvfmuls 2, 3, 3 -; CHECK-NEXT: qvfmadds 0, 0, 2, 4 +; CHECK-NEXT: qvfmuls 4, 3, 3 +; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfmadds 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmuls 1, 1, 0 ; CHECK-NEXT: blr @@ -360,8 +350,6 @@ ret <4 x double> %r } -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadds instead of a qvfnmsubs define <4 x float> @goo3_fmf(<4 x float> %a) nounwind { ; CHECK-LABEL: goo3_fmf: ; CHECK: # %bb.0: # %entry @@ -369,14 +357,11 @@ ; CHECK-NEXT: qvfrsqrtes 2, 1 ; CHECK-NEXT: addi 3, 3, .LCPI14_1@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI14_2@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI14_2@toc@l -; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI14_0@toc@ha -; CHECK-NEXT: qvfmuls 4, 2, 2 ; CHECK-NEXT: addi 3, 3, .LCPI14_0@toc@l -; CHECK-NEXT: qvfmadds 0, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 0, 4, 3 +; CHECK-NEXT: qvfmuls 4, 2, 2 +; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfmadds 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: qvfmuls 0, 2, 0 ; CHECK-NEXT: qvfmuls 0, 0, 1