diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13252,13 +13252,24 @@ } } + bool CanReassociate = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + + auto isReassociatable = [Options](SDValue N) { + return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + }; + + auto isFoldableFMUL = [isContractableFMUL, isReassociatable](SDValue N) { + return isContractableFMUL(N) && isReassociatable(N); + }; + // More folding opportunities when target permits. - if (Aggressive) { + if (Aggressive && CanReassociate) { bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && + isFoldableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), @@ -13271,8 +13282,7 @@ // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N1.getOperand(2)) && - N1->hasOneUse() && NoSignedZero) { + isFoldableFMUL(N1.getOperand(2)) && N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode( @@ -13282,7 +13292,6 @@ DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0)); } - // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode && @@ -13290,7 +13299,7 @@ SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020) && + if (isFoldableFMUL(N020) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N020.getValueType())) { return DAG.getNode( @@ -13314,7 +13323,7 @@ SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002) && + if (isFoldableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode( @@ -13336,7 +13345,7 @@ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); - if (isContractableFMUL(N120) && + if (isFoldableFMUL(N120) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, N120.getValueType())) { SDValue N1200 = N120.getOperand(0); @@ -13363,7 +13372,7 @@ SDValue N100 = CvtSrc.getOperand(0); SDValue N101 = CvtSrc.getOperand(1); SDValue N102 = CvtSrc.getOperand(2); - if (isContractableFMUL(N102) && + if (isFoldableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, CvtSrc.getValueType())) { SDValue N1020 = N102.getOperand(0); diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll --- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll @@ -309,10 +309,10 @@ ; GFX9-F32DENORM-NEXT: s_setpc_b64 define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 { entry: - %mul = fmul half %u, %v + %mul = fmul reassoc half %u, %v %mul.ext = fpext half %mul to float %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) - %add = fsub float %fma, %z + %add = fsub reassoc float %fma, %z ret float %add } @@ -350,10 +350,10 @@ ; GFX9-F32DENORM-NEXT: s_setpc_b64 define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 { entry: - %mul = fmul half %u, %v + %mul = fmul reassoc half %u, %v %mul.ext = fpext half %mul to float %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext) - %add = fsub float %x, %fma + %add = fsub reassoc float %x, %fma ret float %add } diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll --- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -187,14 +187,16 @@ define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C, ; CHECK-LABEL: test_FMSUB_ASSOC_EXT1: ; CHECK: # %bb.0: -; CHECK-NEXT: fmsub 0, 1, 2, 5 -; CHECK-NEXT: fmadd 1, 3, 4, 0 +; CHECK-NEXT: fmuls 0, 1, 2 +; CHECK-NEXT: fmadd 0, 3, 4, 0 +; CHECK-NEXT: fsub 1, 0, 5 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1: ; CHECK-VSX: # %bb.0: -; CHECK-VSX-NEXT: xsmsubmdp 1, 2, 5 -; CHECK-VSX-NEXT: xsmaddadp 1, 3, 4 +; CHECK-VSX-NEXT: fmuls 0, 1, 2 +; CHECK-VSX-NEXT: xsmaddadp 0, 3, 4 +; CHECK-VSX-NEXT: xssubdp 1, 0, 5 ; CHECK-VSX-NEXT: blr double %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -208,15 +210,16 @@ define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C, ; CHECK-LABEL: test_FMSUB_ASSOC_EXT2: ; CHECK: # %bb.0: -; CHECK-NEXT: fmsub 0, 3, 4, 5 -; CHECK-NEXT: fmadd 1, 1, 2, 0 +; CHECK-NEXT: fmuls 0, 3, 4 +; CHECK-NEXT: fmadds 0, 1, 2, 0 +; CHECK-NEXT: fsub 1, 0, 5 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2: ; CHECK-VSX: # %bb.0: -; CHECK-VSX-NEXT: xsmsubmdp 3, 4, 5 -; CHECK-VSX-NEXT: xsmaddadp 3, 1, 2 -; CHECK-VSX-NEXT: fmr 1, 3 +; CHECK-VSX-NEXT: fmuls 0, 3, 4 +; CHECK-VSX-NEXT: fmadds 0, 1, 2, 0 +; CHECK-VSX-NEXT: xssubdp 1, 0, 5 ; CHECK-VSX-NEXT: blr float %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -230,18 +233,16 @@ define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C, ; CHECK-LABEL: test_FMSUB_ASSOC_EXT3: ; CHECK: # %bb.0: -; CHECK-NEXT: fneg 0, 1 -; CHECK-NEXT: fmadd 0, 0, 2, 5 -; CHECK-NEXT: fneg 1, 3 -; CHECK-NEXT: fmadd 1, 1, 4, 0 +; CHECK-NEXT: fmuls 0, 1, 2 +; CHECK-NEXT: fmadd 0, 3, 4, 0 +; CHECK-NEXT: fsub 1, 5, 0 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3: ; CHECK-VSX: # %bb.0: -; CHECK-VSX-NEXT: xsnegdp 1, 1 -; CHECK-VSX-NEXT: xsnegdp 0, 3 -; CHECK-VSX-NEXT: xsmaddmdp 1, 2, 5 -; CHECK-VSX-NEXT: xsmaddadp 1, 0, 4 +; CHECK-VSX-NEXT: fmuls 0, 1, 2 +; CHECK-VSX-NEXT: xsmaddadp 0, 3, 4 +; CHECK-VSX-NEXT: xssubdp 1, 5, 0 ; CHECK-VSX-NEXT: blr double %D, double %E) { %F = fmul float %A, %B ; [#uses=1] @@ -255,19 +256,16 @@ define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C, ; CHECK-LABEL: test_FMSUB_ASSOC_EXT4: ; CHECK: # %bb.0: -; CHECK-NEXT: fneg 0, 3 -; CHECK-NEXT: fmadd 0, 0, 4, 5 -; CHECK-NEXT: fneg 1, 1 -; CHECK-NEXT: fmadd 1, 1, 2, 0 +; CHECK-NEXT: fmuls 0, 3, 4 +; CHECK-NEXT: fmadds 0, 1, 2, 0 +; CHECK-NEXT: fsub 1, 5, 0 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4: ; CHECK-VSX: # %bb.0: -; CHECK-VSX-NEXT: xsnegdp 0, 3 -; CHECK-VSX-NEXT: xsnegdp 1, 1 -; CHECK-VSX-NEXT: xsmaddmdp 0, 4, 5 -; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2 -; CHECK-VSX-NEXT: fmr 1, 0 +; CHECK-VSX-NEXT: fmuls 0, 3, 4 +; CHECK-VSX-NEXT: fmadds 0, 1, 2, 0 +; CHECK-VSX-NEXT: xssubdp 1, 5, 0 ; CHECK-VSX-NEXT: blr float %D, double %E) { %F = fmul float %A, %B ; [#uses=1]