diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10100,6 +10100,7 @@ // Generates UABD/SABD instruction. static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { + EVT VT = N->getValueType(0); SDValue AbsOp1 = N->getOperand(0); SDValue Op0, Op1; @@ -10112,10 +10113,14 @@ unsigned Opc0 = Op0.getOpcode(); // Check if the operands of the sub are (zero|sign)-extended. if (Opc0 != Op1.getOpcode() || - (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) + (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) { + // fold (abs (sub nsw x, y)) -> abds(x, y) + if (AbsOp1->getFlags().hasNoSignedWrap() && + TLI.isOperationLegalOrCustom(ISD::ABDS, VT)) + return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1); return SDValue(); + } - EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -409,8 +409,7 @@ define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) #0 { ; CHECK-LABEL: sabd_v16i8_nsw: ; CHECK: // %bb.0: -; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b -; CHECK-NEXT: abs v0.16b, v0.16b +; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %sub = sub nsw <16 x i8> %a, %b %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) @@ -420,8 +419,7 @@ define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) #0 { ; CHECK-LABEL: sabd_v8i16_nsw: ; CHECK: // %bb.0: -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-NEXT: abs v0.8h, v0.8h +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %sub = sub nsw <8 x i16> %a, %b %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) @@ -431,8 +429,7 @@ define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) #0 { ; CHECK-LABEL: sabd_v4i32_nsw: ; CHECK: // %bb.0: -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %sub = sub nsw <4 x i32> %a, %b %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)