Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4049,7 +4049,8 @@ break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Operand.getNode()->getFlags()); Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -36,6 +36,31 @@ ret float %add } +; -(X - Y) --> (Y - X) + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' +; FMFDEBUG: fsub {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' +; GLOBALDEBUG: fsub {{t[0-9]+}}, {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' + +define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) { +; FMF-LABEL: fneg_fsub_nozeros_1: +; FMF: # %bb.0: +; FMF-NEXT: xssubsp 1, 2, 1 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fneg_fsub_nozeros_1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xssubsp 1, 2, 1 +; GLOBAL-NEXT: blr + %neg = fsub float %x, %y + %add = fsub nsz float 0.0, %neg + ret float %add +} + ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:' @@ -166,11 +191,11 @@ define float @fmul_fma_reassoc1(float %x) { ; FMF-LABEL: fmul_fma_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l +; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l ; FMF-NEXT: lfsx 0, 0, 3 -; FMF-NEXT: addis 3, 2, .LCPI6_1@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI6_1@toc@l +; FMF-NEXT: addis 3, 2, .LCPI7_1@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI7_1@toc@l ; FMF-NEXT: lfsx 2, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmaddasp 0, 1, 2 @@ -179,8 +204,8 @@ ; ; GLOBAL-LABEL: fmul_fma_reassoc1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l +; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l ; GLOBAL-NEXT: lfsx 0, 0, 3 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr @@ -202,11 +227,11 @@ define float @fmul_fma_reassoc2(float %x) { ; FMF-LABEL: fmul_fma_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l +; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l ; FMF-NEXT: lfsx 0, 0, 3 -; FMF-NEXT: addis 3, 2, .LCPI7_1@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI7_1@toc@l +; FMF-NEXT: addis 3, 2, .LCPI8_1@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI8_1@toc@l ; FMF-NEXT: lfsx 2, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmaddasp 0, 1, 2 @@ -215,8 +240,8 @@ ; ; GLOBAL-LABEL: fmul_fma_reassoc2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l +; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l ; GLOBAL-NEXT: lfsx 0, 0, 3 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr @@ -238,11 +263,11 @@ define float @fmul_fma_fast1(float %x) { ; FMF-LABEL: fmul_fma_fast1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l +; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l ; FMF-NEXT: lfsx 0, 0, 3 -; FMF-NEXT: addis 3, 2, .LCPI8_1@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI8_1@toc@l +; FMF-NEXT: addis 3, 2, .LCPI9_1@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI9_1@toc@l ; FMF-NEXT: lfsx 2, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmaddasp 0, 1, 2 @@ -251,8 +276,8 @@ ; ; GLOBAL-LABEL: fmul_fma_fast1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l +; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l ; GLOBAL-NEXT: lfsx 0, 0, 3 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr @@ -274,11 +299,11 @@ define float @fmul_fma_fast2(float %x) { ; FMF-LABEL: fmul_fma_fast2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l +; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI10_0@toc@l ; FMF-NEXT: lfsx 0, 0, 3 -; FMF-NEXT: addis 3, 2, .LCPI9_1@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI9_1@toc@l +; FMF-NEXT: addis 3, 2, .LCPI10_1@toc@ha +; FMF-NEXT: addi 3, 3, .LCPI10_1@toc@l ; FMF-NEXT: lfsx 2, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmaddasp 0, 1, 2 @@ -287,8 +312,8 @@ ; ; GLOBAL-LABEL: fmul_fma_fast2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l +; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; GLOBAL-NEXT: addi 3, 3, .LCPI10_0@toc@l ; GLOBAL-NEXT: lfsx 0, 0, 3 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr @@ -317,20 +342,20 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB10_2 +; GLOBAL-NEXT: beq 0, .LBB11_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 2, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha ; GLOBAL-NEXT: fneg 0, 1 ; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: addi 3, 3, .LCPI10_0@toc@l +; GLOBAL-NEXT: addi 3, 3, .LCPI11_0@toc@l ; GLOBAL-NEXT: lfsx 3, 0, 3 ; GLOBAL-NEXT: xsmaddasp 4, 0, 3 ; GLOBAL-NEXT: xsmulsp 0, 2, 2 ; GLOBAL-NEXT: xsmaddasp 3, 4, 0 ; GLOBAL-NEXT: xsmulsp 0, 2, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 1 -; GLOBAL-NEXT: .LBB10_2: +; GLOBAL-NEXT: .LBB11_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call afn float @llvm.sqrt.f32(float %x) @@ -351,26 +376,26 @@ ; FMF-LABEL: sqrt_fast: ; FMF: # %bb.0: ; FMF-NEXT: xssqrtsp 1, 1 -; FMF-NEXT: blr +; FMF: blr ; ; GLOBAL-LABEL: sqrt_fast: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB11_2 +; GLOBAL-NEXT: beq 0, .LBB12_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 2, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha ; GLOBAL-NEXT: fneg 0, 1 ; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: addi 3, 3, .LCPI11_0@toc@l +; GLOBAL-NEXT: addi 3, 3, .LCPI12_0@toc@l ; GLOBAL-NEXT: lfsx 3, 0, 3 ; GLOBAL-NEXT: xsmaddasp 4, 0, 3 ; GLOBAL-NEXT: xsmulsp 0, 2, 2 ; GLOBAL-NEXT: xsmaddasp 3, 4, 0 ; GLOBAL-NEXT: xsmulsp 0, 2, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 1 -; GLOBAL-NEXT: .LBB11_2: +; GLOBAL-NEXT: .LBB12_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call fast float @llvm.sqrt.f32(float %x) @@ -392,10 +417,10 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: xscmpudp 0, 1, 0 -; FMF-NEXT: blt 0, .LBB12_2 +; FMF-NEXT: blt 0, .LBB13_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 -; FMF-NEXT: .LBB12_2: +; FMF-NEXT: .LBB13_2: ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; @@ -403,10 +428,10 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 -; GLOBAL-NEXT: blt 0, .LBB12_2 +; GLOBAL-NEXT: blt 0, .LBB13_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: fmr 3, 2 -; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: .LBB13_2: ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr %cmp = fcmp nnan ult double %a, 0.0