Index: llvm/test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -277,12 +277,15 @@ ; GLOBALDEBUG: fmul afn {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' -define float @sqrt_afn(float %x) { -; FMF-LABEL: sqrt_afn: +define float @sqrt_afn_ieee(float %x) #0 { +; FMF-LABEL: sqrt_afn_ieee: ; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI10_2@toc@ha +; FMF-NEXT: fabs 0, 1 +; FMF-NEXT: lfs 2, .LCPI10_2@toc@l(3) +; FMF-NEXT: fcmpu 0, 0, 2 ; FMF-NEXT: xxlxor 0, 0, 0 -; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB10_2 +; FMF-NEXT: blt 0, .LBB10_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 ; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha @@ -298,11 +301,14 @@ ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; -; GLOBAL-LABEL: sqrt_afn: +; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI10_2@toc@ha +; GLOBAL-NEXT: fabs 0, 1 +; GLOBAL-NEXT: lfs 2, .LCPI10_2@toc@l(3) +; GLOBAL-NEXT: fcmpu 0, 0, 2 ; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB10_2 +; GLOBAL-NEXT: blt 0, .LBB10_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha @@ -320,6 +326,49 @@ ret float %rt } +define float @sqrt_afn_preserve_sign(float %x) #1 { +; FMF-LABEL: sqrt_afn_preserve_sign: +; FMF: # %bb.0: +; FMF-NEXT: xxlxor 0, 0, 0 +; FMF-NEXT: fcmpu 0, 1, 0 +; FMF-NEXT: beq 0, .LBB11_2 +; FMF-NEXT: # %bb.1: +; FMF-NEXT: xsrsqrtesp 0, 1 +; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmulsp 1, 1, 2 +; FMF-NEXT: xsaddsp 0, 0, 3 +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: .LBB11_2: +; FMF-NEXT: fmr 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: sqrt_afn_preserve_sign: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xxlxor 0, 0, 0 +; GLOBAL-NEXT: fcmpu 0, 1, 0 +; GLOBAL-NEXT: beq 0, .LBB11_2 +; GLOBAL-NEXT: # %bb.1: +; GLOBAL-NEXT: xsrsqrtesp 0, 1 +; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 1, 3 +; GLOBAL-NEXT: xsmulsp 0, 0, 2 +; GLOBAL-NEXT: .LBB11_2: +; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: blr + %rt = call afn float @llvm.sqrt.f32(float %x) + ret float %rt +} + ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' @@ -330,42 +379,90 @@ ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' -define float @sqrt_fast(float %x) { -; FMF-LABEL: sqrt_fast: +define float @sqrt_fast_ieee(float %x) #0 { +; FMF-LABEL: sqrt_fast_ieee: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI12_2@toc@ha +; FMF-NEXT: fabs 0, 1 +; FMF-NEXT: lfs 2, .LCPI12_2@toc@l(3) +; FMF-NEXT: fcmpu 0, 0, 2 +; FMF-NEXT: xxlxor 0, 0, 0 +; FMF-NEXT: blt 0, .LBB12_2 +; FMF-NEXT: # %bb.1: +; FMF-NEXT: xsrsqrtesp 0, 1 +; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI12_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI12_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI12_1@toc@l(4) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: xsmaddasp 2, 1, 0 +; FMF-NEXT: xsmulsp 0, 1, 3 +; FMF-NEXT: xsmulsp 0, 0, 2 +; FMF-NEXT: .LBB12_2: +; FMF-NEXT: fmr 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: sqrt_fast_ieee: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI12_2@toc@ha +; GLOBAL-NEXT: fabs 0, 1 +; GLOBAL-NEXT: lfs 2, .LCPI12_2@toc@l(3) +; GLOBAL-NEXT: fcmpu 0, 0, 2 +; GLOBAL-NEXT: xxlxor 0, 0, 0 +; GLOBAL-NEXT: blt 0, .LBB12_2 +; GLOBAL-NEXT: # %bb.1: +; GLOBAL-NEXT: xsrsqrtesp 0, 1 +; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI12_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI12_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI12_1@toc@l(4) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 1, 3 +; GLOBAL-NEXT: xsmulsp 0, 0, 2 +; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: blr + %rt = call fast float @llvm.sqrt.f32(float %x) + ret float %rt +} + +define float @sqrt_fast_preserve_sign(float %x) #1 { +; FMF-LABEL: sqrt_fast_preserve_sign: ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB11_2 +; FMF-NEXT: beq 0, .LBB13_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI13_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI13_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI13_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB11_2: +; FMF-NEXT: .LBB13_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; -; GLOBAL-LABEL: sqrt_fast: +; GLOBAL-LABEL: sqrt_fast_preserve_sign: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB11_2 +; GLOBAL-NEXT: beq 0, .LBB13_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI13_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI13_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI13_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: +; GLOBAL-NEXT: .LBB13_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call fast float @llvm.sqrt.f32(float %x) @@ -387,10 +484,10 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: xscmpudp 0, 1, 0 -; FMF-NEXT: blt 0, .LBB12_2 +; FMF-NEXT: blt 0, .LBB14_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 -; FMF-NEXT: .LBB12_2: +; FMF-NEXT: .LBB14_2: ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; @@ -398,10 +495,10 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 -; GLOBAL-NEXT: blt 0, .LBB12_2 +; GLOBAL-NEXT: blt 0, .LBB14_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: fmr 3, 2 -; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: .LBB14_2: ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr %cmp = fcmp nnan ult double %a, 0.0 @@ -477,3 +574,5 @@ ret float %add } +attributes #0 = { "denormal-fp-math"="ieee,ieee" } +attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" } Index: llvm/test/CodeGen/PowerPC/qpx-recipest.ll =================================================================== --- llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -317,9 +317,13 @@ ; CHECK-NEXT: qvfnmsub 2, 4, 3, 2 ; CHECK-NEXT: qvfmul 0, 0, 2 ; CHECK-NEXT: qvlfdx 2, 0, 3 +; CHECK-NEXT: addis 3, 2, .LCPI12_2@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI12_2@toc@l +; CHECK-NEXT: qvlfdx 3, 0, 3 ; CHECK-NEXT: qvfmul 0, 0, 1 -; CHECK-NEXT: qvfcmpeq 1, 1, 2 -; CHECK-NEXT: qvfsel 1, 1, 2, 0 +; CHECK-NEXT: qvfabs 1, 1 +; CHECK-NEXT: qvfcmplt 1, 1, 2 +; CHECK-NEXT: qvfsel 1, 1, 3, 0 ; CHECK-NEXT: blr entry: %r = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) @@ -360,10 +364,14 @@ ; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 ; CHECK-NEXT: qvfmadds 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 +; CHECK-NEXT: addis 3, 2, .LCPI14_2@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI14_2@toc@l +; CHECK-NEXT: qvlfsx 4, 0, 3 ; CHECK-NEXT: qvfmuls 0, 2, 0 +; CHECK-NEXT: qvfabs 2, 1 ; CHECK-NEXT: qvfmuls 0, 0, 1 -; CHECK-NEXT: qvfcmpeq 1, 1, 3 -; CHECK-NEXT: qvfsel 1, 1, 3, 0 +; CHECK-NEXT: qvfcmplt 1, 2, 3 +; CHECK-NEXT: qvfsel 1, 1, 4, 0 ; CHECK-NEXT: blr entry: %r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) Index: llvm/test/CodeGen/PowerPC/recipest.ll =================================================================== --- llvm/test/CodeGen/PowerPC/recipest.ll +++ llvm/test/CodeGen/PowerPC/recipest.ll @@ -292,7 +292,7 @@ ret float %r } -define <4 x float> @hoo3_fmf(<4 x float> %a) nounwind { +define <4 x float> @hoo3_fmf(<4 x float> %a) #1 { ; CHECK: @hoo3_fmf ; CHECK: vrsqrtefp ; CHECK-DAG: vcmpeqfp @@ -309,3 +309,4 @@ } attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" } +attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }