diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9505,18 +9505,27 @@ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) return SDValue(); + EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); - // Check if the operands are of same type and valid size. unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; - if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) - return SDValue(); - Op0 = Op0.getOperand(0); - Op1 = Op1.getOperand(0); - SDValue ABD = - DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD); + // fold abs(sext(x) - sext(y)) -> zext(abds(x, y)) + // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y)) + // NOTE: Extensions must be equivalent. + if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) { + Op0 = Op0.getOperand(0); + Op1 = Op1.getOperand(0); + SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD); + } + + // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y)) + // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y)) + if (TLI.isOperationLegalOrCustom(ABDOpcode, VT)) + return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1); + + return SDValue(); } SDValue DAGCombiner::visitABS(SDNode *N) { diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -53,8 +53,7 @@ ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-NEXT: sshr v1.4h, v1.4h, #8 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: abs v0.4h, v0.4h +; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %a.sext = sext <4 x i8> %a to <4 x i16> %b.sext = sext <4 x i8> %b to <4 x i16> @@ -108,8 +107,7 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: abs v0.2s, v0.2s +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %a.sext = sext <2 x i16> %a to <2 x i32> %b.sext = sext <2 x i16> %b to <2 x i32> @@ -234,8 +232,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-NEXT: bic v1.4h, #255, lsl #8 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: abs v0.4h, v0.4h +; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %a.zext = zext <4 x i8> %a to <4 x i16> %b.zext = zext <4 x i8> %b to <4 x i16> @@ -288,8 +285,7 @@ ; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: abs v0.2s, v0.2s +; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %a.zext = zext <2 x i16> %a to <2 x i32> %b.zext = zext <2 x i16> %b to <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll --- a/llvm/test/CodeGen/AArch64/sve-abd.ll +++ b/llvm/test/CodeGen/AArch64/sve-abd.ll @@ -24,11 +24,10 @@ define @sabd_b_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: sabd_b_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: sub z0.b, z0.b, z1.b -; CHECK-NEXT: abs z0.b, p2/m, z0.b +; CHECK-NEXT: sabd z0.b, p2/m, z0.b, z1.b ; CHECK-NEXT: ret %a.sext = sext %a to %b.sext = sext %b to @@ -57,8 +56,7 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h ; CHECK-NEXT: sxtb z1.h, p0/m, z1.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h -; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: sabd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a.sext = sext %a to %b.sext = sext %b to @@ -87,8 +85,7 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: sxth z1.s, p0/m, z1.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s -; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a.sext = sext %a to %b.sext = sext %b to @@ -117,8 +114,7 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sxtw z0.d, p0/m, z0.d ; CHECK-NEXT: sxtw z1.d, p0/m, z1.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d -; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: sabd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a.sext = sext %a to %b.sext = sext %b to @@ -148,11 +144,10 @@ define @uabd_b_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_b_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: add z0.b, z0.b, z1.b -; CHECK-NEXT: abs z0.b, p2/m, z0.b +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1 +; CHECK-NEXT: uabd z0.b, p2/m, z0.b, z1.b ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -178,11 +173,10 @@ define @uabd_h_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_h_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h -; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -208,11 +202,10 @@ define @uabd_s_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_s_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: and z1.s, z1.s, #0xffff -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s -; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -238,11 +231,10 @@ define @uabd_d_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_d_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d -; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -251,6 +243,66 @@ ret %abs } +; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and +; %b have differing types. +define @uabd_non_matching_extension( %a, %b) #0 { +; CHECK-LABEL: uabd_non_matching_extension: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: sub z1.d, z2.d, z3.d +; CHECK-NEXT: abs z1.d, p0/m, z1.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +; Test the situation where isLegal(ISD::ABD, typeof(%a.zext)) returns true but +; %a and %b have differing types. +define @uabd_non_matching_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: uabd_non_matching_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + ret %abs +} + +; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and +; %b are promoted differently. +define @uabd_non_matching_promotion( %a, %b) #0 { +; CHECK-LABEL: uabd_non_matching_promotion: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = sext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + ret %abs +} + declare @llvm.abs.nxv16i8(, i1) declare @llvm.abs.nxv8i16(, i1) diff --git a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll --- a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll @@ -21,8 +21,7 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vsub.i16 q0, q0, q1 -; CHECK-NEXT: vabs.s16 q0, q0 +; CHECK-NEXT: vabd.s16 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> @@ -74,8 +73,7 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vsub.i32 q0, q0, q1 -; CHECK-NEXT: vabs.s32 q0, q0 +; CHECK-NEXT: vabd.s32 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> %sextsrc2 = sext <4 x i16> %src2 to <4 x i32> @@ -158,8 +156,7 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.u8 q1, q1 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vsub.i16 q0, q0, q1 -; CHECK-NEXT: vabs.s16 q0, q0 +; CHECK-NEXT: vabd.u16 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> @@ -210,8 +207,7 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.u16 q1, q1 ; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vsub.i32 q0, q0, q1 -; CHECK-NEXT: vabs.s32 q0, q0 +; CHECK-NEXT: vabd.u32 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> %zextsrc2 = zext <4 x i16> %src2 to <4 x i32>