Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -4070,22 +4070,34 @@ NegatibleCost &Cost, unsigned Depth = 0) const; - /// This is the helper function to return the newly negated expression only - /// when the cost is cheaper. - SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOps, bool OptForSize, - unsigned Depth = 0) const { + SDValue getCheaperOrNeutralNegatedExpression( + SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, + const NegatibleCost CostThreshold = NegatibleCost::Neutral, + unsigned Depth = 0) const { NegatibleCost Cost = NegatibleCost::Expensive; SDValue Neg = getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); - if (Neg && Cost == NegatibleCost::Cheaper) + if (!Neg) + return SDValue(); + + if (Cost <= CostThreshold) return Neg; + // Remove the new created node to avoid the side effect to the DAG. - if (Neg && Neg->use_empty()) + if (Neg->use_empty()) DAG.RemoveDeadNode(Neg.getNode()); return SDValue(); } + /// This is the helper function to return the newly negated expression only + /// when the cost is cheaper. + SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + unsigned Depth = 0) const { + return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, + NegatibleCost::Cheaper, Depth); + } + /// This is the helper function to return the newly negated expression if /// the cost is not expensive. SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, @@ -4696,7 +4708,7 @@ SelectionDAG &DAG) const; // Targets may override this function to collect operands from the CallInst - // and for example, lower them into the SelectionDAG operands. + // and for example, lower them into the SelectionDAG operands. virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl &Ops, SelectionDAG &DAG) const; Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -387,6 +387,10 @@ SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC); + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -10392,21 +10396,20 @@ } /// Generate Min/Max node -static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, - SDValue RHS, SDValue True, SDValue False, - ISD::CondCode CC, const TargetLowering &TLI, - SelectionDAG &DAG) { +SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC) { if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG); // If we can't directly match this, try to see if we can pull an fneg out of // the select. - if (True.getOpcode() != ISD::FNEG) + SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression( + True, DAG, LegalOperations, ForCodeSize); + if (!NegTrue) return SDValue(); - ConstantFPSDNode *CRHS = dyn_cast(RHS); - ConstantFPSDNode *CFalse = dyn_cast(False); - SDValue NegTrue = True.getOperand(0); + HandleSDNode NegTrueHandle(NegTrue); // Try to unfold an fneg from the select if we are comparing the negated // constant. @@ -10414,14 +10417,18 @@ // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K)) // // TODO: Handle fabs - if (LHS == NegTrue && CFalse && CRHS) { - APFloat NegRHS = neg(CRHS->getValueAPF()); - if (NegRHS == CFalse->getValueAPF()) { - SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue, - False, CC, TLI, DAG); - if (Combined) + if (LHS == NegTrue) { + // If we can't directly match this, try to see if we can pull an fneg out of + // the select. + SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression( + RHS, DAG, LegalOperations, ForCodeSize); + if (NegRHS) { + HandleSDNode NegRHSHandle(NegRHS); + if (NegRHS == False) { + SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue, + False, CC, TLI, DAG); return DAG.getNode(ISD::FNEG, DL, VT, Combined); - return SDValue(); + } } } @@ -10812,8 +10819,8 @@ // // This is OK if we don't care what happens if either operand is a NaN. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) - if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, - CC, TLI, DAG)) + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; // Use 'unsigned add with overflow' to optimize an unsigned saturating add. @@ -11325,8 +11332,7 @@ // NaN. // if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { - if (SDValue FMinMax = - combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } Index: llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll @@ -4488,10 +4488,7 @@ ; GFX11-SAFE: ; %bb.0: ; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SAFE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SAFE-NEXT: s_movk_i32 s0, 0x4000 -; GFX11-SAFE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-SAFE-NEXT: v_fmac_f16_e32 v1, v3, v2 +; GFX11-SAFE-NEXT: v_fma_f16 v1, v2, v3, 2.0 ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SAFE-NEXT: v_mul_f16_e64 v0, -v1, v4 ; GFX11-SAFE-NEXT: v_mul_f16_e64 v1, -v1, v5 Index: llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll =================================================================== --- llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll +++ llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll @@ -67,13 +67,10 @@ ; CHECK-NEXT: vmov.f32 s0, #4.000000e+00 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmov.f32 s4, #-8.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #8.000000e+00 -; CHECK-NEXT: vsub.f32 s6, s0, s2 -; CHECK-NEXT: vsub.f32 s0, s2, s0 -; CHECK-NEXT: vcmp.f32 s4, s6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s0, s8 +; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %sub.0 = fsub nnan nsz float 4.0, %a %sub.1 = fsub nnan nsz float %a, 4.0 @@ -88,13 +85,10 @@ ; CHECK-NEXT: vmov.f32 s0, #4.000000e+00 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00 -; CHECK-NEXT: vsub.f32 s6, s0, s2 -; CHECK-NEXT: vsub.f32 s0, s2, s0 -; CHECK-NEXT: vcmp.f32 s4, s6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s0, s8 +; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %sub.0 = fsub nnan nsz float 4.0, %a %sub.1 = fsub nnan nsz float %a, 4.0 @@ -108,15 +102,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f32 s0, #-4.000000e+00 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmov.f32 s6, #8.000000e+00 -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vmul.f32 s2, s2, s4 -; CHECK-NEXT: vcmp.f32 s6, s0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s2, s8 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %mul.0 = fmul nnan nsz float %a, 4.0 %mul.1 = fmul nnan nsz float %a, -4.0 @@ -130,15 +120,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f32 s0, #-4.000000e+00 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmov.f32 s6, #-8.000000e+00 -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #8.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #-8.000000e+00 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vmul.f32 s2, s2, s4 -; CHECK-NEXT: vcmp.f32 s6, s0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s2, s8 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %mul.0 = fmul nnan nsz float %a, 4.0 %mul.1 = fmul nnan nsz float %a, -4.0 @@ -194,15 +180,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f32 s0, #4.000000e+00 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmov.f32 s6, #8.000000e+00 -; CHECK-NEXT: vmov.f32 s4, #-4.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vmul.f32 s2, s2, s4 -; CHECK-NEXT: vcmp.f32 s6, s0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s2, s8 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %mul.0 = fmul nnan nsz float %a, -4.0 %mul.1 = fmul nnan nsz float %a, 4.0