diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11953,6 +11953,7 @@ // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros(); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -12116,7 +12117,7 @@ // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N1.getOperand(2)) && - N1->hasOneUse()) { + N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -357,8 +357,9 @@ ; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[Y]], [[Z]], [[TMP0]] ; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[TMP1]] -; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]] -; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] +; SI-UNSAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]] +; SI-UNSAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[Y]], [[Z]], [[TMP0]] +; SI-UNSAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[TMP1]] ; SI: buffer_store_dwordx2 [[RESULT]] define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll --- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -537,8 +537,9 @@ ; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[C]] ; SI-STD-SAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP0]] -; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]] -; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]] +; SI-STD-UNSAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]] +; SI-STD-UNSAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[C]] +; SI-STD-UNSAFE: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[A]], [[TMP0]] ; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[D]], [[E]] ; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll --- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -110,8 +110,9 @@ ; CHECK-SAFE-NEXT: blr ; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2: -; CHECK-UNSAFE: fnmsub -; CHECK-UNSAFE-NEXT: fnmsub +; CHECK-UNSAFE: fmul +; CHECK-UNSAFE-NEXT: fmadd +; CHECK-UNSAFE-NEXT: fsub ; CHECK-UNSAFE-NEXT: blr ; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2: @@ -121,6 +122,37 @@ ; CHECK-SAFE-VSX-NEXT: blr ; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2: +; CHECK-UNSAFE-VSX: xsmuldp +; CHECK-UNSAFE-VSX-NEXT: xsmaddadp +; CHECK-UNSAFE-VSX-NEXT: xssubdp +; CHECK-UNSAFE-VSX-NEXT: blr +} + +define double @test_FMSUB_ASSOC2_NSZ(double %A, double %B, double %C, + double %D, double %E) { + %F = fmul nsz double %A, %B ; [#uses=1] + %G = fmul nsz double %C, %D ; [#uses=1] + %H = fadd nsz double %F, %G ; [#uses=1] + %I = fsub nsz double %E, %H ; [#uses=1] + ret double %I +; CHECK-SAFE-LABEL: test_FMSUB_ASSOC2_NSZ: +; CHECK-SAFE: fmul +; CHECK-SAFE-NEXT: fmadd +; CHECK-SAFE-NEXT: fsub +; CHECK-SAFE-NEXT: blr + +; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2_NSZ: +; CHECK-UNSAFE: fnmsub +; CHECK-UNSAFE-NEXT: fnmsub +; CHECK-UNSAFE-NEXT: blr + +; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2_NSZ: +; CHECK-SAFE-VSX: xsmuldp +; CHECK-SAFE-VSX-NEXT: xsmaddadp +; CHECK-SAFE-VSX-NEXT: xssubdp +; CHECK-SAFE-VSX-NEXT: blr + +; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2_NSZ: ; CHECK-UNSAFE-VSX: xsnmsubmdp ; CHECK-UNSAFE-VSX-NEXT: xsnmsubadp ; CHECK-UNSAFE-VSX-NEXT: fmr