Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -795,6 +795,13 @@ return true; } + // By default, favour the fold (abs (sub nsw x, y)) -> abds(x, y) if ABDS is + // legal or custom. Some targets may want to limit ABDS to legal ops to + // prevent loss of sub_nsw pattern. + virtual bool preferABDToABS(EVT VT) const { + return isOperationLegalOrCustom(ISD::ABDS, VT); + } + // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) virtual bool preferScalarizeSplat(unsigned Opc) const { return true; } Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10507,9 +10507,7 @@ if (Opc0 != Op1.getOpcode() || (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) { // fold (abs (sub nsw x, y)) -> abds(x, y) - // Limit this to legal ops to prevent loss of sub_nsw pattern. - if (AbsOp1->getFlags().hasNoSignedWrap() && - TLI.isOperationLegal(ISD::ABDS, VT)) { + if (AbsOp1->getFlags().hasNoSignedWrap() && TLI.preferABDToABS(VT)) { SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1); return DAG.getZExtOrTrunc(ABD, DL, SrcVT); } Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1046,6 +1046,8 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool preferABDToABS(EVT VT) const override; + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57000,6 +57000,11 @@ return SDValue(); } +// Limit ABDS to legal ops to prevent loss of sub_nsw pattern. +bool X86TargetLowering::preferABDToABS(EVT VT) const { + return isOperationLegal(ISD::ABDS, VT); +} + bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; Index: llvm/test/CodeGen/AArch64/neon-saba.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-saba.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" +attributes #0 = { "target-features"="+neon,+neon,+sve2" } + +; SABA from ADD(ABS(SUB NSW)) + +define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +; CHECK-LABEL: saba_abs_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s +; CHECK-NEXT: ret + %sub = sub nsw <4 x i32> %b, %c + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) + %add = add <4 x i32> %a, %abs + ret <4 x i32> %add +} + +define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +; CHECK-LABEL: saba_abs_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s +; CHECK-NEXT: ret + %sub = sub nsw <2 x i32> %b, %c + %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) + %add = add <2 x i32> %a, %abs + ret <2 x i32> %add +} + +define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +; CHECK-LABEL: saba_abs_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret + %sub = sub nsw <8 x i16> %b, %c + %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) + %add = add <8 x i16> %a, %abs + ret <8 x i16> %add +} + +define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +; CHECK-LABEL: saba_abs_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret + %sub = sub nsw <4 x i16> %b, %c + %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) + %add = add <4 x i16> %a, %abs + ret <4 x i16> %add +} + +define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +; CHECK-LABEL: saba_abs_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret + %sub = sub nsw <16 x i8> %b, %c + %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) + %add = add <16 x i8> %a, %abs + ret <16 x i8> %add +} + +define <8 x i8> @saba_abs_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +; CHECK-LABEL: saba_abs_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b +; CHECK-NEXT: ret + %sub = sub nsw <8 x i8> %b, %c + %abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %sub, i1 true) + %add = add <8 x i8> %a, %abs + ret <8 x i8> %add +} + +; SABA from ADD(SABD) + +define <4 x i32> @saba_sabd_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +; CHECK-LABEL: saba_sabd_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s +; CHECK-NEXT: ret + %sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> %c) + %add = add <4 x i32> %sabd, %a + ret <4 x i32> %add +} + +define <2 x i32> @saba_sabd_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +; CHECK-LABEL: saba_sabd_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s +; CHECK-NEXT: ret + %sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) + %add = add <2 x i32> %sabd, %a + ret <2 x i32> %add +} + +define <8 x i16> @saba_sabd_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +; CHECK-LABEL: saba_sabd_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret + %sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> %c) + %add = add <8 x i16> %sabd, %a + ret <8 x i16> %add +} + +define <4 x i16> @saba_sabd_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +; CHECK-LABEL: saba_sabd_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret + %sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) + %add = add <4 x i16> %sabd, %a + ret <4 x i16> %add +} + +define <16 x i8> @saba_sabd_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +; CHECK-LABEL: saba_sabd_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret + %sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> %c) + %add = add <16 x i8> %sabd, %a + ret <16 x i8> %add +} + +define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +; CHECK-LABEL: saba_sabd_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b +; CHECK-NEXT: ret + %sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) + %add = add <8 x i8> %sabd, %a + ret <8 x i8> %add +} + +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) + +declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) +declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) +declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) Index: llvm/test/CodeGen/AArch64/sve-saba.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-saba.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" +attributes #0 = { "target-features"="+neon,+sve,+sve2" } + +; SABA from ADD(ABS(SUB NSW)) + +define @saba_abs_d( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_d: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv2i64( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_abs_s( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_abs_h( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv8i16( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_abs_b( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv16i8( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +; SABA from ADD(SABD) + +define @saba_sabd_d( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_d: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv2i64( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +define @saba_sabd_s( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv4i32( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +define @saba_sabd_h( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv8i16( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +define @saba_sabd_b( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv16i8( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +declare @llvm.abs.nxv2i64(, i1) +declare @llvm.abs.nxv4i32(, i1) +declare @llvm.abs.nxv8i16(, i1) +declare @llvm.abs.nxv16i8(, i1) + +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.nxv16i8(, , )