diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11879,7 +11879,8 @@ // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N1.getOperand(2))) { + isContractableFMUL(N1.getOperand(2)) && + N1->hasOneUse()) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-excessive-fma.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-excessive-fma.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-excessive-fma.ll @@ -0,0 +1,28 @@ +;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s + +declare float @llvm.sqrt.f32(float) #0 + +; CHECK: {{^}}excess_fma: +; CHECK: v_sub_f32_e32 +; CHECK: v_sub_f32_e32 +define amdgpu_cs float @excess_fma(float inreg %0, float inreg %1, float inreg %2, float inreg %3) local_unnamed_addr { + %.i185 = fmul reassoc nnan nsz arcp contract float %1, 0x3FF5F91700000000 + %.i286 = fmul reassoc nnan nsz arcp contract float %2, 0x3FB9264240000000 + %.v0 = fadd reassoc nnan nsz arcp contract float %3, %.i185 + %.v1 = fadd reassoc nnan nsz arcp contract float %.i286, %.v0 + %.i087 = fmul reassoc nnan nsz arcp contract float %0, 0x3E65798EE0000000 + %.i188 = fmul reassoc nnan nsz arcp contract float %2, 0x3FEFB85660000000 + %.v2 = fadd reassoc nnan nsz arcp contract float %.i087, %.i188 + %.i0101 = fsub reassoc nnan nsz arcp contract float %.v2, %.v1 + %.i1102 = fsub reassoc nnan nsz arcp contract float %.v1, %3 + %.i2103 = fsub reassoc nnan nsz arcp contract float %3, %.v2 + %.i0106 = fmul reassoc nnan nsz arcp contract float %.i0101, %.v2 + %.i1107 = fmul reassoc nnan nsz arcp contract float %.i1102, %.v1 + %.v3 = fadd reassoc nnan nsz arcp contract float %.i1107, %.i0106 + %.v4 = fmul reassoc nnan nsz arcp contract float %3, %.i2103 + %.v5 = fadd reassoc nnan nsz arcp contract float %.v4, %.v3 + %.root = call reassoc nnan nsz arcp contract float @llvm.sqrt.f32(float %.v5) + ret float %.root +} + +attributes #0 = { nounwind readnone speculatable willreturn }