diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14305,10 +14305,8 @@ if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) { SDValue C = FMul.getOperand(0); SDValue D = FMul.getOperand(1); - - DAG.MorphNodeTo(FMul.getNode(), PreferredFusedOpcode, FMul->getVTList(), - {C, D, E}); - + SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); + DAG.ReplaceAllUsesOfValueWith(FMul, CDE); return FMA; } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -221,6 +221,22 @@ ret float %t5 } +define amdgpu_ps float @fmac_sequence_innermost_fmul_sgpr(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) #0 { +; GCN-LABEL: fmac_sequence_innermost_fmul_sgpr: +; GCN: ; %bb.0: +; GCN-NEXT: v_mac_f32_e64 v0, s2, s3 +; GCN-NEXT: v_fmac_f32_e64 v0, s0, s1 +; GCN-NEXT: v_fmac_f32_e64 v0, s4, s5 +; GCN-NEXT: ; return to shader part epilog + %t0 = fmul fast float %a, %b + %t1 = fmul fast float %c, %d + %t2 = fadd fast float %t0, %t1 + %t3 = fmul fast float %e, %f + %t4 = fadd fast float %t2, %t3 + %t5 = fadd fast float %t4, %g + ret float %t5 +} + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn declare float @llvm.maxnum.f32(float, float) #1