Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13350,6 +13350,16 @@ if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is + // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it + // was called from a context with a nsz flag if the input fsub does not. + if (N0.getOpcode() == ISD::FSUB && + (DAG.getTarget().Options.NoSignedZerosFPMath || + N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), + N0.getOperand(0), N->getFlags()); + } + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. if (!TLI.isFNegFree(VT) && Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4688,11 +4688,6 @@ if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); - // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && - OpOpcode == ISD::FSUB) - return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), - Operand.getOperand(0), Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getOperand(0); break; Index: llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-no-signed-zeros-fp-math=true < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-no-signed-zeros-fp-math=false < %s | FileCheck %s + +; no-signed-zeros-fp-math should not increase the number of +; instructions emitted. + +define { double, double } @testfn(double %arg, double %arg1, double %arg2) { +; CHECK-LABEL: testfn: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_add_f64 v[4:5], v[4:5], -v[0:1] +; CHECK-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3] +; CHECK-NEXT: v_add_f64 v[2:3], -v[2:3], -v[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] +bb: + %tmp = fsub fast double 0.000000e+00, %arg1 + %tmp3 = fsub fast double %arg2, %arg + %tmp4 = fadd fast double %tmp3, %tmp + %tmp5 = fsub fast double %tmp, %tmp3 + %tmp6 = insertvalue { double, double } undef, double %tmp4, 0 + %tmp7 = insertvalue { double, double } %tmp6, double %tmp5, 1 + ret { double, double } %tmp7 +}