Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14579,6 +14579,48 @@ DAG.getConstant(0, DL, MVT::i64)); } +/// Perform the scalar expression (a ? b-c : b+1) combine in the form of: +/// b + CSEL (-c, 1, a != 0) / b + CSNEG (1, c, a==0) +/// into: +/// CSINC (b-c, b, a != 0) +static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (!VT.isScalarInteger()) + return SDValue(); + + SDValue CSneg = N->getOperand(0); + SDValue Copy = N->getOperand(1); + if ((CSneg.getOpcode() != AArch64ISD::CSNEG && + CSneg.getOpcode() != AArch64ISD::CSEL) || + !CSneg.hasOneUse() || Copy.getOpcode() != ISD::CopyFromReg) + return SDValue(); + + ISD::CondCode CC = ISD::SETNE; + // The csneg/csel should include a const one operand. + ConstantSDNode *CTVal = dyn_cast(CSneg.getOperand(0)); + ConstantSDNode *CFVal = dyn_cast(CSneg.getOperand(1)); + if (!CTVal || !CFVal || (!CTVal->isOne() && !CFVal->isOne())) + return SDValue(); + + if (CFVal->isOne() && !CTVal->isOne()) { + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, VT); + } + + SDValue Reg = Copy->getOperand(1); + ISD::NodeType AddSubOpc = ISD::SUB; + if (CSneg.getOpcode() == AArch64ISD::CSEL) + AddSubOpc = ISD::ADD; + + SDLoc DL(N); + SDValue NewNode = DAG.getNode(AddSubOpc, DL, VT, Reg, SDValue(CFVal, 0)); + AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + SDValue CCVal = DAG.getConstant(AArch64CC, DL, VT); + SDValue Chain = CSneg.getOperand(3); + + return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, Reg, CCVal, Chain); +} + // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y) static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -14663,6 +14705,8 @@ return Val; if (SDValue Val = performAddDotCombine(N, DAG)) return Val; + if (SDValue Val = performAddCSelIntoCSinc(N, DAG)) + return Val; return performAddSubLongCombine(N, DCI, DAG); } Index: llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-- -o - < %s | FileCheck %s + +; Verify that we can fold csneg/csel into csinc instruction. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; int csinc1 (int a, int b) { return !a ? b+3 : b+1; } +define dso_local i32 @csinc1(i32 %a, i32 %b) { +; CHECK-LABEL: csinc1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, eq +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 3, i32 1 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +} + +; int csinc2 (int a, int b) { return a ? b+3 : b+1; } +define dso_local i32 @csinc2(i32 %a, i32 %b) { +; CHECK-LABEL: csinc2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, ne +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 1, i32 3 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +} + +; int csinc3 (int a, int b) { return !a ? b+1 : b-3; } +define dso_local i32 @csinc3(i32 %a, i32 %b) { +; CHECK-LABEL: csinc3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w1, #3 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csinc w0, w8, w1, ne +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 1, i32 -3 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +} + +; int csinc4 (int a, int b) { return a ? b+1 : b-3; } +define dso_local i32 @csinc4(i32 %a, i32 %b) { +; CHECK-LABEL: csinc4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w1, #3 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csinc w0, w8, w1, eq +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 -3, i32 1 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +}