Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7005,16 +7005,40 @@ OptForSize, Cost, Depth)) return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1)); break; - case ISD::SELECT: { - Cost = NegatibleCost::Cheaper; + case ISD::SELECT: + case ISD::VSELECT: { + // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS)) + // iff at least one cost is cheaper and the other is neutral/cheaper SDValue LHS = Op.getOperand(1); + NegatibleCost CostLHS = NegatibleCost::Expensive; + SDValue NegLHS = + getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth); + if (!NegLHS || CostLHS > NegatibleCost::Neutral) { + RemoveDeadNode(NegLHS); + break; + } + + // Prevent this node from being deleted by the next call. + Handles.emplace_back(NegLHS); + SDValue RHS = Op.getOperand(2); - if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) { - return DAG.getNode(ISD::SELECT, DL, VT, Op.getOperand(0), - LHS.getOperand(0), RHS.getOperand(0)); + NegatibleCost CostRHS = NegatibleCost::Expensive; + SDValue NegRHS = + getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth); + + // We're done with the handles. + Handles.clear(); + + if (!NegRHS || CostRHS > NegatibleCost::Neutral || + (CostLHS != NegatibleCost::Cheaper && + CostRHS != NegatibleCost::Cheaper)) { + RemoveDeadNode(NegLHS); + RemoveDeadNode(NegRHS); + break; } - break; + Cost = std::min(CostLHS, CostRHS); + return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS); } } Index: llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll =================================================================== --- llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll +++ llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll @@ -249,12 +249,11 @@ ; CHECK-LABEL: fadd_select_fneg_negk_f16: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f16 s0, r1 -; CHECK-NEXT: vmov.f16 s2, #-4.000000e+00 -; CHECK-NEXT: vneg.f16 s0, s0 +; CHECK-NEXT: vmov.f16 s2, #4.000000e+00 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: vseleq.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 s2, r2 -; CHECK-NEXT: vadd.f16 s0, s0, s2 +; CHECK-NEXT: vsub.f16 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr %cmp = icmp eq i32 %arg0, 0 @@ -268,12 +267,11 @@ ; CHECK-LABEL: fadd_select_fneg_posk_f16: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f16 s0, r1 -; CHECK-NEXT: vmov.f16 s2, #4.000000e+00 -; CHECK-NEXT: vneg.f16 s0, s0 +; CHECK-NEXT: vmov.f16 s2, #-4.000000e+00 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: vseleq.f16 s0, s0, s2 ; CHECK-NEXT: vmov.f16 s2, r2 -; CHECK-NEXT: vadd.f16 s0, s0, s2 +; CHECK-NEXT: vsub.f16 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr %cmp = icmp eq i32 %arg0, 0 @@ -286,41 +284,40 @@ define <8 x half> @fadd_vselect_fneg_posk_v8f16(<8 x i32> %arg0, <8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fadd_vselect_fneg_posk_v8f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vmov.i8 q1, #0xff ; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vldrw.u32 q3, [r0] +; CHECK-NEXT: add r0, sp, #16 ; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vmov.i8 q0, #0x0 -; CHECK-NEXT: vmov.i8 q1, #0xff -; CHECK-NEXT: add r0, sp, #32 ; CHECK-NEXT: vpsel q2, q1, q0 +; CHECK-NEXT: vldrw.u32 q3, [r0] +; CHECK-NEXT: vmov r2, r1, d4 +; CHECK-NEXT: add r12, sp, #32 +; CHECK-NEXT: vmov r4, r5, d5 +; CHECK-NEXT: vmov.16 q2[0], r2 +; CHECK-NEXT: vmov.16 q2[1], r1 ; CHECK-NEXT: vcmp.i32 eq, q3, zr ; CHECK-NEXT: vpsel q1, q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vmov r1, r0, d4 -; CHECK-NEXT: vmov r4, r5, d5 -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r2, r3, d2 ; CHECK-NEXT: vmov.16 q2[2], r4 -; CHECK-NEXT: vmov lr, r12, d3 +; CHECK-NEXT: vmov r3, r0, d2 ; CHECK-NEXT: vmov.16 q2[3], r5 -; CHECK-NEXT: vneg.f16 q0, q0 -; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: vmov.i16 q1, #0x4400 -; CHECK-NEXT: vmov.16 q2[5], r3 +; CHECK-NEXT: vmov.16 q2[4], r3 +; CHECK-NEXT: vmov r6, lr, d3 +; CHECK-NEXT: vmov.16 q2[5], r0 +; CHECK-NEXT: vldrw.u32 q1, [r12] +; CHECK-NEXT: vmov.16 q2[6], r6 +; CHECK-NEXT: vmov.i16 q0, #0xc400 +; CHECK-NEXT: vmov.16 q2[7], lr ; CHECK-NEXT: add r0, sp, #48 -; CHECK-NEXT: vmov.16 q2[6], lr -; CHECK-NEXT: vmov.16 q2[7], r12 ; CHECK-NEXT: vcmp.i16 ne, q2, zr -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vadd.f16 q0, q0, q1 +; CHECK-NEXT: vsub.f16 q0, q1, q0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: pop {r4, r5, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} %cmp = icmp eq <8 x i32> %arg0, zeroinitializer %neg.x = fneg <8 x half> %x %select = select <8 x i1> %cmp, <8 x half> %neg.x, <8 x half>