Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14696,6 +14696,49 @@ Dot.getOperand(2)); } +static bool isNegatedInteger(const SDValue &Op) { + return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)); +} + +static SDValue getNegatedInteger(const SDValue &Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, Zero, Op); +} + +// Try to fold +// +// (neg (csel X, Y)) -> (csel (neg X), (neg Y)) +// +// The folding helps csel to be matched with csneg without generating +// redundant neg instruction, which includes negation of the csel expansion +// of abs node lowered by lowerABS. +static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::SUB || !isNullConstant(N->getOperand(0))) + return SDValue(); + + SDValue CSel = N->getOperand(1); + if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse()) + return SDValue(); + + SDValue N0 = CSel.getOperand(0); + SDValue N1 = CSel.getOperand(1); + + // If both of them is not negations, it's not worth the folding as it + // introduces two additional negations while reducing one negation. + if (!isNegatedInteger(N0) && !isNegatedInteger(N1)) + return SDValue(); + + SDValue N0N = getNegatedInteger(N0, DAG); + SDValue N1N = getNegatedInteger(N1, DAG); + + SDLoc DL(N); + EVT VT = CSel.getValueType(); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2), + CSel.getOperand(3)); +} + // The basic add/sub long vector instructions have variants with "2" on the end // which act on the high-half of their inputs. They are normally matched by // patterns like: @@ -14757,6 +14800,8 @@ return Val; if (SDValue Val = performAddDotCombine(N, DAG)) return Val; + if (SDValue Val = performNegCSelCombine(N, DAG)) + return Val; return performAddSubLongCombine(N, DCI, DAG); } @@ -17617,6 +17662,7 @@ default: LLVM_DEBUG(dbgs() << "Custom combining: skipping\n"); break; + case ISD::ADD: case ISD::SUB: return performAddSubCombine(N, DCI, DAG); Index: llvm/test/CodeGen/AArch64/neg-abs.ll =================================================================== --- llvm/test/CodeGen/AArch64/neg-abs.ll +++ llvm/test/CodeGen/AArch64/neg-abs.ll @@ -8,8 +8,7 @@ ; CHECK-LABEL: neg_abs64: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: cneg x8, x0, mi -; CHECK-NEXT: neg x0, x8 +; CHECK-NEXT: cneg x0, x0, pl ; CHECK-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) %neg = sub nsw i64 0, %abs @@ -22,8 +21,7 @@ ; CHECK-LABEL: neg_abs32: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: cneg w8, w0, mi -; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: cneg w0, w0, pl ; CHECK-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) %neg = sub nsw i32 0, %abs Index: llvm/test/CodeGen/AArch64/neg-selects.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/neg-selects.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-none-eabi %s -o - | FileCheck %s + +define i32 @neg_select_neg(i32 %a, i32 %b, i1 %bb) { +; CHECK-LABEL: neg_select_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: csel w0, w0, w1, ne +; CHECK-NEXT: ret + %nega = sub i32 0, %a + %negb = sub i32 0, %b + %sel = select i1 %bb, i32 %nega, i32 %negb + %res = sub i32 0, %sel + ret i32 %res +} + +define i32 @negneg_select_nega(i32 %a, i32 %b, i1 %bb) { +; CHECK-LABEL: negneg_select_nega: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: csneg w0, w1, w0, eq +; CHECK-NEXT: ret + %nega = sub i32 0, %a + %sel = select i1 %bb, i32 %nega, i32 %b + %nsel = sub i32 0, %sel + %res = sub i32 0, %nsel + ret i32 %res +} + +define i32 @neg_select_nega(i32 %a, i32 %b, i1 %bb) { +; CHECK-LABEL: neg_select_nega: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: csneg w0, w0, w1, ne +; CHECK-NEXT: ret + %nega = sub i32 0, %a + %sel = select i1 %bb, i32 %nega, i32 %b + %res = sub i32 0, %sel + ret i32 %res +} + +define i32 @neg_select_negb(i32 %a, i32 %b, i1 %bb) { +; CHECK-LABEL: neg_select_negb: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: csneg w0, w1, w0, eq +; CHECK-NEXT: ret + %negb = sub i32 0, %b + %sel = select i1 %bb, i32 %a, i32 %negb + %res = sub i32 0, %sel + ret i32 %res +} + +define i32 @neg_select_ab(i32 %a, i32 %b, i1 %bb) { +; CHECK-LABEL: neg_select_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: csel w8, w0, w1, ne +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %sel = select i1 %bb, i32 %a, i32 %b + %res = sub i32 0, %sel + ret i32 %res +} + +define i32 @neg_select_nega_with_use(i32 %a, i32 %b, i1 %bb) { +; CHECK-LABEL: neg_select_nega_with_use: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w2, #0x1 +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: csneg w9, w1, w0, eq +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %nega = sub i32 0, %a + %sel = select i1 %bb, i32 %nega, i32 %b + %nsel = sub i32 0, %sel + %res = add i32 %nsel, %nega + ret i32 %res +}