Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -135,6 +135,9 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; +def FeatureTwoConstNR : + SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true", + "Requires two constant Newton-Rhapson computation">; def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; @@ -227,7 +230,7 @@ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureBPERMD, FeatureExtDiv, - FeatureMFTB, DeprecatedDST]; + FeatureMFTB, DeprecatedDST, FeatureTwoConstNR]; list Power8SpecificFeatures = [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -10985,7 +10985,9 @@ if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); - UseOneConstNR = true; + // The Newton-Rhapson computation with a single constant does not provide + // enough accuracy on some CPUs. + UseOneConstNR = !Subtarget.needsTwoConstNR(); return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -98,6 +98,7 @@ bool HasSPE; bool HasQPX; bool HasVSX; + bool NeedsTwoConstNR; bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; @@ -246,6 +247,7 @@ bool hasFPU() const { return HasFPU; } bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } + bool needsTwoConstNR() const { return NeedsTwoConstNR; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } Index: test/CodeGen/PowerPC/fma-mutate.ll =================================================================== --- test/CodeGen/PowerPC/fma-mutate.ll +++ test/CodeGen/PowerPC/fma-mutate.ll @@ -14,8 +14,6 @@ ret double %r ; CHECK: @foo3 -; CHECK: fmr [[REG:[0-9]+]], [[REG2:[0-9]+]] -; CHECK: xsnmsubadp [[REG]], {{[0-9]+}}, [[REG2]] ; CHECK: xsmaddmdp ; CHECK: xsmaddadp } Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -284,16 +284,16 @@ ; FMF-NEXT: fcmpu 0, 1, 0 ; FMF-NEXT: beq 0, .LBB10_2 ; FMF-NEXT: # %bb.1: +; FMF-NEXT: xsrsqrtesp 0, 1 ; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; FMF-NEXT: xsrsqrtesp 3, 1 -; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) -; FMF-NEXT: xsmulsp 2, 1, 0 -; FMF-NEXT: xsmulsp 4, 3, 3 -; FMF-NEXT: xssubsp 2, 2, 1 -; FMF-NEXT: xsmulsp 2, 2, 4 -; FMF-NEXT: xssubsp 0, 0, 2 -; FMF-NEXT: xsmulsp 0, 3, 0 -; FMF-NEXT: xsmulsp 0, 0, 1 +; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmulsp 1, 1, 2 +; FMF-NEXT: xsaddsp 0, 0, 3 +; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: .LBB10_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr @@ -304,16 +304,15 @@ ; GLOBAL-NEXT: fcmpu 0, 1, 0 ; GLOBAL-NEXT: beq 0, .LBB10_2 ; GLOBAL-NEXT: # %bb.1: -; GLOBAL-NEXT: xsrsqrtesp 2, 1 -; GLOBAL-NEXT: fneg 0, 1 +; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: lfs 3, .LCPI10_0@toc@l(3) -; GLOBAL-NEXT: xsmaddasp 4, 0, 3 -; GLOBAL-NEXT: xsmulsp 0, 2, 2 -; GLOBAL-NEXT: xsmaddasp 3, 4, 0 -; GLOBAL-NEXT: xsmulsp 0, 2, 3 -; GLOBAL-NEXT: xsmulsp 0, 0, 1 +; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 1, 3 +; GLOBAL-NEXT: xsmulsp 0, 0, 2 ; GLOBAL-NEXT: .LBB10_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr @@ -338,16 +337,15 @@ ; FMF-NEXT: fcmpu 0, 1, 0 ; FMF-NEXT: beq 0, .LBB11_2 ; FMF-NEXT: # %bb.1: -; FMF-NEXT: xsrsqrtesp 2, 1 -; FMF-NEXT: fneg 0, 1 +; FMF-NEXT: xsrsqrtesp 0, 1 ; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; FMF-NEXT: fmr 4, 1 -; FMF-NEXT: lfs 3, .LCPI11_0@toc@l(3) -; FMF-NEXT: xsmaddasp 4, 0, 3 -; FMF-NEXT: xsmulsp 0, 2, 2 -; FMF-NEXT: xsmaddasp 3, 4, 0 -; FMF-NEXT: xsmulsp 0, 2, 3 -; FMF-NEXT: xsmulsp 0, 0, 1 +; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: xsmaddasp 2, 1, 0 +; FMF-NEXT: xsmulsp 0, 1, 3 +; FMF-NEXT: xsmulsp 0, 0, 2 ; FMF-NEXT: .LBB11_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr @@ -358,16 +356,15 @@ ; GLOBAL-NEXT: fcmpu 0, 1, 0 ; GLOBAL-NEXT: beq 0, .LBB11_2 ; GLOBAL-NEXT: # %bb.1: -; GLOBAL-NEXT: xsrsqrtesp 2, 1 -; GLOBAL-NEXT: fneg 0, 1 +; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: lfs 3, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmaddasp 4, 0, 3 -; GLOBAL-NEXT: xsmulsp 0, 2, 2 -; GLOBAL-NEXT: xsmaddasp 3, 4, 0 -; GLOBAL-NEXT: xsmulsp 0, 2, 3 -; GLOBAL-NEXT: xsmulsp 0, 0, 1 +; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 1, 3 +; GLOBAL-NEXT: xsmulsp 0, 0, 2 ; GLOBAL-NEXT: .LBB11_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr Index: test/CodeGen/PowerPC/recipest.ll =================================================================== --- test/CodeGen/PowerPC/recipest.ll +++ test/CodeGen/PowerPC/recipest.ll @@ -14,15 +14,16 @@ ret double %r ; CHECK: @foo -; CHECK-DAG: frsqrte -; CHECK-DAG: fnmsub +; CHECK: frsqrte ; CHECK: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK: blr ; CHECK-SAFE: @foo @@ -53,10 +54,10 @@ ; CHECK: @foof ; CHECK-DAG: frsqrtes -; CHECK-DAG: fnmsubs ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls +; CHECK-NEXT: fmuls ; CHECK-NEXT: fmul ; CHECK-NEXT: blr @@ -74,13 +75,14 @@ ; CHECK: @foo ; CHECK-DAG: frsqrte -; CHECK-DAG: fnmsub ; CHECK: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: frsp ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr @@ -98,11 +100,11 @@ ; CHECK: @goo ; CHECK-DAG: frsqrtes -; CHECK-DAG: fnmsubs ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls ; CHECK-NEXT: fmuls +; CHECK-NEXT: fmuls ; CHECK-NEXT: blr ; CHECK-SAFE: @goo @@ -138,7 +140,6 @@ ; CHECK-DAG: fres ; CHECK-DAG: fnmsubs ; CHECK-DAG: fmuls -; CHECK-DAG: fnmsubs ; CHECK-DAG: fmadds ; CHECK-DAG: fmadds ; CHECK: fmuls @@ -219,11 +220,11 @@ ; CHECK: @foo3 ; CHECK: fcmpu ; CHECK-DAG: frsqrte -; CHECK-DAG: fnmsub ; CHECK: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul @@ -241,7 +242,6 @@ ; CHECK: @goo3 ; CHECK: fcmpu ; CHECK-DAG: frsqrtes -; CHECK-DAG: fnmsubs ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls Index: test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll =================================================================== --- test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll +++ test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll @@ -8,7 +8,7 @@ br i1 undef, label %for.body.lr.ph, label %for.end ; CHECK-LABEL: @LSH_recall_init -; CHECK: xsnmsubadp +; CHECK: xsmaddadp for.body.lr.ph: ; preds = %entry %conv3 = fpext float %W to double