diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11953,6 +11953,7 @@ // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros(); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -12116,7 +12117,7 @@ // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N1.getOperand(2)) && - N1->hasOneUse()) { + N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -376,9 +376,10 @@ %u = load volatile double, double addrspace(1)* %gep.3 %v = load volatile double, double addrspace(1)* %gep.4 - %tmp0 = fmul double %u, %v - %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 - %tmp2 = fsub double %x, %tmp1 + ; nsz flag is needed since this combine may change sign of zero + %tmp0 = fmul nsz double %u, %v + %tmp1 = call nsz double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 + %tmp2 = fsub nsz double %x, %tmp1 store double %tmp2, double addrspace(1)* %gep.out ret void diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll --- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -566,9 +566,10 @@ %u = load volatile float, float addrspace(1)* %gep.3 %v = load volatile float, float addrspace(1)* %gep.4 - %tmp0 = fmul float %u, %v - %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 - %tmp2 = fsub float %x, %tmp1 + ; nsz flag is needed since this combine may change sign of zero + %tmp0 = fmul nsz float %u, %v + %tmp1 = call nsz float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 + %tmp2 = fsub nsz float %x, %tmp1 store float %tmp2, float addrspace(1)* %gep.out ret void diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll --- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -110,8 +110,9 @@ ; CHECK-SAFE-NEXT: blr ; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2: -; CHECK-UNSAFE: fnmsub -; CHECK-UNSAFE-NEXT: fnmsub +; CHECK-UNSAFE: fmul +; CHECK-UNSAFE-NEXT: fmadd +; CHECK-UNSAFE-NEXT: fsub ; CHECK-UNSAFE-NEXT: blr ; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2: @@ -121,6 +122,37 @@ ; CHECK-SAFE-VSX-NEXT: blr ; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2: +; CHECK-UNSAFE-VSX: xsmuldp +; CHECK-UNSAFE-VSX-NEXT: xsmaddadp +; CHECK-UNSAFE-VSX-NEXT: xssubdp +; CHECK-UNSAFE-VSX-NEXT: blr +} + +define double @test_FMSUB_ASSOC2_NSZ(double %A, double %B, double %C, + double %D, double %E) { + %F = fmul nsz double %A, %B ; [#uses=1] + %G = fmul nsz double %C, %D ; [#uses=1] + %H = fadd nsz double %F, %G ; [#uses=1] + %I = fsub nsz double %E, %H ; [#uses=1] + ret double %I +; CHECK-SAFE-LABEL: test_FMSUB_ASSOC2_NSZ: +; CHECK-SAFE: fmul +; CHECK-SAFE-NEXT: fmadd +; CHECK-SAFE-NEXT: fsub +; CHECK-SAFE-NEXT: blr + +; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2_NSZ: +; CHECK-UNSAFE: fnmsub +; CHECK-UNSAFE-NEXT: fnmsub +; CHECK-UNSAFE-NEXT: blr + +; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2_NSZ: +; CHECK-SAFE-VSX: xsmuldp +; CHECK-SAFE-VSX-NEXT: xsmaddadp +; CHECK-SAFE-VSX-NEXT: xssubdp +; CHECK-SAFE-VSX-NEXT: blr + +; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2_NSZ: ; CHECK-UNSAFE-VSX: xsnmsubmdp ; CHECK-UNSAFE-VSX-NEXT: xsnmsubadp ; CHECK-UNSAFE-VSX-NEXT: fmr