diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11936,13 +11936,24 @@ return SDValue(); }; - // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (SDValue V = tryToFoldXYSubZ(N0, N1)) - return V; - - // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) - if (SDValue V = tryToFoldXSubYZ(N0, N1)) - return V; + // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (isContractableFMUL(N0) && isContractableFMUL(N1) && + (N0.getNode()->use_size() > N1.getNode()->use_size())) { + // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b)) + if (SDValue V = tryToFoldXSubYZ(N0, N1)) + return V; + // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d))) + if (SDValue V = tryToFoldXYSubZ(N0, N1)) + return V; + } else { + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (SDValue V = tryToFoldXYSubZ(N0, N1)) + return V; + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + if (SDValue V = tryToFoldXSubYZ(N0, N1)) + return V; + } // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && diff --git a/llvm/test/CodeGen/PowerPC/fma-precision.ll b/llvm/test/CodeGen/PowerPC/fma-precision.ll --- a/llvm/test/CodeGen/PowerPC/fma-precision.ll +++ b/llvm/test/CodeGen/PowerPC/fma-precision.ll @@ -5,10 +5,10 @@ define double @fsub1(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: fsub1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsmuldp 3, 4, 3 ; CHECK-NEXT: xsmuldp 0, 2, 1 -; CHECK-NEXT: xsmsubadp 3, 2, 1 -; CHECK-NEXT: xsmuldp 1, 0, 3 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: xsnmsubadp 1, 4, 3 +; CHECK-NEXT: xsmuldp 1, 0, 1 ; CHECK-NEXT: blr entry: %mul = fmul fast double %b, %a @@ -97,3 +97,67 @@ %add = fadd fast double %mul1, %mul ret double %add } + +define double @fma_multi_uses1(double %a, double %b, double %c, double %d, double* %p1, double* %p2, double* %p3) { +; CHECK-LABEL: fma_multi_uses1: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmuldp 1, 1, 2 +; CHECK-NEXT: xsmuldp 0, 3, 4 +; CHECK-NEXT: stfd 1, 0(7) +; CHECK-NEXT: stfd 1, 0(8) +; CHECK-NEXT: xsnmsubadp 1, 3, 4 +; CHECK-NEXT: stfd 0, 0(9) +; CHECK-NEXT: blr + %ab = fmul fast double %a, %b + %cd = fmul fast double %c, %d + store double %ab, double* %p1 ; extra use of %ab + store double %ab, double* %p2 ; another extra use of %ab + store double %cd, double* %p3 ; extra use of %cd + %r = fsub fast double %ab, %cd + ret double %r +} + +define double @fma_multi_uses2(double %a, double %b, double %c, double %d, double* %p1, double* %p2, double* %p3) { +; CHECK-LABEL: fma_multi_uses2: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmuldp 5, 1, 2 +; CHECK-NEXT: xsmuldp 0, 3, 4 +; CHECK-NEXT: stfd 5, 0(7) +; CHECK-NEXT: stfd 0, 0(8) +; CHECK-NEXT: stfd 0, 0(9) +; CHECK-NEXT: xsmsubadp 0, 1, 2 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: blr + %ab = fmul fast double %a, %b + %cd = fmul fast double %c, %d + store double %ab, double* %p1 ; extra use of %ab + store double %cd, double* %p2 ; extra use of %cd + store double %cd, double* %p3 ; another extra use of %cd + %r = fsub fast double %ab, %cd + ret double %r +} + +define double @fma_multi_uses3(double %a, double %b, double %c, double %d, double %f, double %g, double* %p1, double* %p2, double* %p3) { +; CHECK-LABEL: fma_multi_uses3: +; CHECK: # %bb.0: +; CHECK-NEXT: xsmuldp 0, 1, 2 +; CHECK-NEXT: xsmuldp 1, 5, 6 +; CHECK-NEXT: ld 3, 96(1) +; CHECK-NEXT: stfd 0, 0(9) +; CHECK-NEXT: stfd 0, 0(10) +; CHECK-NEXT: stfd 1, 0(3) +; CHECK-NEXT: xsnmsubadp 1, 3, 4 +; CHECK-NEXT: xsnmsubadp 0, 3, 4 +; CHECK-NEXT: xsadddp 1, 0, 1 +; CHECK-NEXT: blr + %ab = fmul fast double %a, %b + %cd = fmul fast double %c, %d + %fg = fmul fast double %f, %g + store double %ab, double* %p1 ; extra use of %ab + store double %ab, double* %p2 ; another extra use of %ab + store double %fg, double* %p3 ; extra use of %fg + %q = fsub fast double %fg, %cd ; The uses of %cd reduce to 1 after %r is folded. 2 uses of %fg, fold %cd, remove def of %cd + %r = fsub fast double %ab, %cd ; Fold %r before %q. 3 uses of %ab, 2 uses of %cd, fold %cd + %add = fadd fast double %r, %q + ret double %add +} diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -177,12 +177,11 @@ ; CHECK-NEXT: fmuls 1, 1, 0 ; CHECK-NEXT: fmadds 1, 1, 0, 4 ; CHECK-NEXT: fmuls 0, 0, 5 -; CHECK-NEXT: fres 5, 2 +; CHECK-NEXT: fmuls 0, 0, 1 +; CHECK-NEXT: fres 1, 2 ; CHECK-NEXT: fmuls 4, 0, 1 -; CHECK-NEXT: fmuls 4, 4, 5 -; CHECK-NEXT: fmuls 2, 2, 4 -; CHECK-NEXT: fmsubs 0, 0, 1, 2 -; CHECK-NEXT: fmadds 0, 5, 0, 4 +; CHECK-NEXT: fnmsubs 0, 2, 4, 0 +; CHECK-NEXT: fmadds 0, 1, 0, 4 ; CHECK-NEXT: fmuls 1, 3, 0 ; CHECK-NEXT: blr %x = call fast float @llvm.sqrt.f32(float %a)