Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -4091,6 +4091,21 @@ return SDValue(); } + SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + unsigned Depth = 0) const { + NegatibleCost Cost = NegatibleCost::Expensive; + SDValue Neg = + getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); + if (Neg && + (Cost == NegatibleCost::Cheaper || Cost == NegatibleCost::Neutral)) + return Neg; + // Remove the new created node to avoid the side effect to the DAG. + if (Neg && Neg->use_empty()) + DAG.RemoveDeadNode(Neg.getNode()); + return SDValue(); + } + /// This is the helper function to return the newly negated expression if /// the cost is not expensive. SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -387,6 +387,10 @@ SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC, SelectionDAG &DAG); + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -10356,21 +10360,21 @@ } /// Generate Min/Max node -static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, - SDValue RHS, SDValue True, SDValue False, - ISD::CondCode CC, const TargetLowering &TLI, - SelectionDAG &DAG) { +SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC, + SelectionDAG &DAG) { if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG); // If we can't directly match this, try to see if we can pull an fneg out of // the select. - if (True.getOpcode() != ISD::FNEG) + SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression( + True, DAG, LegalOperations, ForCodeSize); + if (!NegTrue) return SDValue(); - ConstantFPSDNode *CRHS = dyn_cast(RHS); - ConstantFPSDNode *CFalse = dyn_cast(False); - SDValue NegTrue = True.getOperand(0); + HandleSDNode NegTrueHandle(NegTrue); // Try to unfold an fneg from the select if we are comparing the negated // constant. @@ -10378,14 +10382,21 @@ // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K)) // // TODO: Handle fabs - if (LHS == NegTrue && CFalse && CRHS) { - APFloat NegRHS = neg(CRHS->getValueAPF()); - if (NegRHS == CFalse->getValueAPF()) { - SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue, - False, CC, TLI, DAG); + if (LHS == NegTrue) { + // If we can't directly match this, try to see if we can pull an fneg out of + // the select. + SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression( + RHS, DAG, LegalOperations, ForCodeSize); + if (NegRHS) { + HandleSDNode NegRHSHandle(NegRHS); + SDValue Combined; + if (NegRHS == False) { + Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue, False, CC, + TLI, DAG); + } + if (Combined) return DAG.getNode(ISD::FNEG, DL, VT, Combined); - return SDValue(); } } @@ -10776,8 +10787,8 @@ // // This is OK if we don't care what happens if either operand is a NaN. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) - if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, - CC, TLI, DAG)) + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC, DAG)) return FMinMax; // Use 'unsigned add with overflow' to optimize an unsigned saturating add. @@ -11290,7 +11301,7 @@ // if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { if (SDValue FMinMax = - combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, DAG)) return FMinMax; } Index: llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll =================================================================== --- llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll +++ llvm/test/CodeGen/ARM/unsafe-fneg-select-minnum-maxnum-combine.ll @@ -67,13 +67,10 @@ ; CHECK-NEXT: vmov.f32 s0, #4.000000e+00 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmov.f32 s4, #-8.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #8.000000e+00 -; CHECK-NEXT: vsub.f32 s6, s0, s2 -; CHECK-NEXT: vsub.f32 s0, s2, s0 -; CHECK-NEXT: vcmp.f32 s4, s6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s0, s8 +; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %sub.0 = fsub nnan nsz float 4.0, %a %sub.1 = fsub nnan nsz float %a, 4.0 @@ -88,13 +85,10 @@ ; CHECK-NEXT: vmov.f32 s0, #4.000000e+00 ; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00 -; CHECK-NEXT: vsub.f32 s6, s0, s2 -; CHECK-NEXT: vsub.f32 s0, s2, s0 -; CHECK-NEXT: vcmp.f32 s4, s6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s0, s8 +; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %sub.0 = fsub nnan nsz float 4.0, %a %sub.1 = fsub nnan nsz float %a, 4.0 @@ -108,15 +102,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f32 s0, #-4.000000e+00 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmov.f32 s6, #8.000000e+00 -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vmul.f32 s2, s2, s4 -; CHECK-NEXT: vcmp.f32 s6, s0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s2, s8 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %mul.0 = fmul nnan nsz float %a, 4.0 %mul.1 = fmul nnan nsz float %a, -4.0 @@ -130,15 +120,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f32 s0, #-4.000000e+00 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmov.f32 s6, #-8.000000e+00 -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #8.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #-8.000000e+00 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vmul.f32 s2, s2, s4 -; CHECK-NEXT: vcmp.f32 s6, s0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s2, s8 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %mul.0 = fmul nnan nsz float %a, 4.0 %mul.1 = fmul nnan nsz float %a, -4.0 @@ -194,15 +180,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f32 s0, #4.000000e+00 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vmov.f32 s6, #8.000000e+00 -; CHECK-NEXT: vmov.f32 s4, #-4.000000e+00 -; CHECK-NEXT: vmov.f32 s8, #-8.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 ; CHECK-NEXT: vmul.f32 s0, s2, s0 -; CHECK-NEXT: vmul.f32 s2, s2, s4 -; CHECK-NEXT: vcmp.f32 s6, s0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s2, s8 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 ; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: eor r0, r0, #-2147483648 ; CHECK-NEXT: mov pc, lr %mul.0 = fmul nnan nsz float %a, -4.0 %mul.1 = fmul nnan nsz float %a, 4.0