diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15411,6 +15411,55 @@ CSel.getOperand(3)); } +// Return the negative of the current node in the DAG if it exists. +static SDValue simplifyToNegative(SDValue N, SelectionDAG &DAG) { + // t2: v8i16 = AArch64ISD::CMHI t0, t1 + // t3: v8i16 = BUILD_VECTOR Constant:i32<1>, ... + // t4: v8i16 = and t2, t3 + if (N->getOpcode() == ISD::AND) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getOpcode() == AArch64ISD::CMHI) { + KnownBits Known; + unsigned Depth = 0; + Known = DAG.computeKnownBits(RHS, Depth + 1); + if (Known.isConstant() && Known.getConstant().isOne()) + return LHS; + } + if (RHS.getOpcode() == AArch64ISD::CMHI) { + KnownBits Known; + unsigned Depth = 0; + Known = DAG.computeKnownBits(LHS, Depth); + if (Known.isConstant() && Known.getConstant().isOne()) + return RHS; + } + } + return SDValue(); +} + +// Try to fold add (-a b) or add (a -b) -> (sub b a) or (sub b a) +// in case it is profitable to do so. +static SDValue performAddNegCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::ADD) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Replace with subtraction if LHS has only one use and we can find a its + // negative in the DAG. This way, we can get rid of LHS. + if (LHS.hasOneUse()) + if (SDValue NE = simplifyToNegative(LHS, DAG)) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, RHS, NE); + + if (RHS.hasOneUse()) + if (SDValue NE = simplifyToNegative(RHS, DAG)) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, LHS, NE); + + return SDValue(); +} + // The basic add/sub long vector instructions have variants with "2" on the end // which act on the high-half of their inputs. They are normally matched by // patterns like: @@ -15541,6 +15590,8 @@ return Val; if (SDValue Val = performNegCSelCombine(N, DAG)) return Val; + if (SDValue Val = performAddNegCombine(N, DAG)) + return Val; return performAddSubLongCombine(N, DCI, DAG); } diff --git a/llvm/test/CodeGen/AArch64/add-negative.ll b/llvm/test/CodeGen/AArch64/add-negative.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/add-negative.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK + +define <8 x i16> @add_to_sub(<8 x i16> %0, <8 x i16> %1) { +; CHECK-LABEL: add_to_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: cmhi v0.8h, v2.8h, v0.8h +; CHECK-NEXT: cmhi v1.8h, v2.8h, v1.8h +; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %3 = icmp ult <8 x i16> %0, + %4 = sext <8 x i1> %3 to <8 x i16> + %5 = icmp ult <8 x i16> %1, + %6 = zext <8 x i1> %5 to <8 x i16> + %7 = add nsw <8 x i16> %6, %4 + ret <8 x i16> %7 +} + diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -123,10 +123,8 @@ ; CHECK-LABEL: t12: ; CHECK: // %bb.0: ; CHECK-NEXT: cmhi v2.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v3.16b, #1 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: and v1.16b, v2.16b, v3.16b -; CHECK-NEXT: add v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %t1 = icmp ugt <16 x i8> %b, %a %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b