diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1090,6 +1090,7 @@ SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const; + SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1302,12 +1302,10 @@ setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); - if (Subtarget->hasSVE2()) { - setOperationAction(ISD::AVGFLOORS, VT, Custom); - setOperationAction(ISD::AVGFLOORU, VT, Custom); - setOperationAction(ISD::AVGCEILS, VT, Custom); - setOperationAction(ISD::AVGCEILU, VT, Custom); - } + setOperationAction(ISD::AVGFLOORS, VT, Custom); + setOperationAction(ISD::AVGFLOORU, VT, Custom); + setOperationAction(ISD::AVGCEILS, VT, Custom); + setOperationAction(ISD::AVGCEILU, VT, Custom); } // Illegal unpacked integer vector types. @@ -5977,13 +5975,13 @@ case ISD::ABDU: return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED); case ISD::AVGFLOORS: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDS_PRED); + return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED); case ISD::AVGFLOORU: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDU_PRED); + return LowerAVG(Op, DAG, AArch64ISD::HADDU_PRED); case ISD::AVGCEILS: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDS_PRED); + return LowerAVG(Op, DAG, AArch64ISD::RHADDS_PRED); case ISD::AVGCEILU: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDU_PRED); + return LowerAVG(Op, DAG, AArch64ISD::RHADDU_PRED); case ISD::BITREVERSE: return LowerBitreverse(Op, DAG); case ISD::BSWAP: @@ -13244,6 +13242,57 @@ return Chain; } +// When x and y are extended, lower: +// avgfloor(x, y) -> (x + y) >> 1 +// avgceil(x, y) -> (x + y + 1) >> 1 + +// Otherwise, lower to: +// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1) +// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1) +SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG, + unsigned NewOp) const { + if (Subtarget->hasSVE2()) + return LowerToPredicatedOp(Op, DAG, NewOp); + + SDLoc dl(Op); + SDValue OpA = Op->getOperand(0); + SDValue OpB = Op->getOperand(1); + EVT VT = Op.getValueType(); + bool IsCeil = + (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU); + bool IsSigned = + (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS); + unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL; + + assert(VT.isScalableVector() && "Only expect to lower scalable vector op!"); + + auto IsZeroExtended = [&DAG](SDValue &Node) { + KnownBits Known = DAG.computeKnownBits(Node, 0); + return Known.Zero.isSignBitSet(); + }; + + auto IsSignExtended = [&DAG](SDValue &Node) { + return (DAG.ComputeNumSignBits(Node, 0) > 1); + }; + + SDValue ConstantOne = DAG.getConstant(1, dl, VT); + if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) || + (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) { + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB); + if (IsCeil) + Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne); + return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne); + } + + SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne); + SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne); + + SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB); + tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne); + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB); + return DAG.getNode(ISD::ADD, dl, VT, Add, tmp); +} + SDValue AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll @@ -0,0 +1,1295 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve2 | FileCheck %s -check-prefixes=CHECK,SVE2 + +define @hadds_v2i64( %s0, %s1) { +; SVE-LABEL: hadds_v2i64: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.d, z1.d, #1 +; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.d, z3.d, z2.d +; SVE-NEXT: and z0.d, z0.d, #0x1 +; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v2i64: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v2i64_lsh( %s0, %s1) { +; SVE-LABEL: hadds_v2i64_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.d, z1.d, #1 +; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.d, z3.d, z2.d +; SVE-NEXT: and z0.d, z0.d, #0x1 +; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v2i64_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v2i64( %s0, %s1) { +; SVE-LABEL: haddu_v2i64: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.d, z1.d, #1 +; SVE-NEXT: lsr z3.d, z0.d, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.d, z3.d, z2.d +; SVE-NEXT: and z0.d, z0.d, #0x1 +; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v2i64: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v2i32( %s0, %s1) { +; SVE-LABEL: hadds_v2i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.d +; SVE-NEXT: sxtw z0.d, p0/m, z0.d +; SVE-NEXT: adr z0.d, [z0.d, z1.d, sxtw] +; SVE-NEXT: asr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v2i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: sxtw z0.d, p0/m, z0.d +; SVE2-NEXT: sxtw z1.d, p0/m, z1.d +; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v2i32_lsh( %s0, %s1) { +; CHECK-LABEL: hadds_v2i32_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw] +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v2i32( %s0, %s1) { +; SVE-LABEL: haddu_v2i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.d, z0.d, #0xffffffff +; SVE-NEXT: adr z0.d, [z0.d, z1.d, uxtw] +; SVE-NEXT: lsr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v2i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: and z0.d, z0.d, #0xffffffff +; SVE2-NEXT: and z1.d, z1.d, #0xffffffff +; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i32( %s0, %s1) { +; SVE-LABEL: hadds_v4i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.s, z1.s, #1 +; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.s, z3.s, z2.s +; SVE-NEXT: and z0.s, z0.s, #0x1 +; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v4i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i32_lsh( %s0, %s1) { +; SVE-LABEL: hadds_v4i32_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.s, z1.s, #1 +; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.s, z3.s, z2.s +; SVE-NEXT: and z0.s, z0.s, #0x1 +; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v4i32_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v4i32( %s0, %s1) { +; SVE-LABEL: haddu_v4i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.s, z1.s, #1 +; SVE-NEXT: lsr z3.s, z0.s, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.s, z3.s, z2.s +; SVE-NEXT: and z0.s, z0.s, #0x1 +; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v4i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v2i16( %s0, %s1) { +; SVE-LABEL: hadds_v2i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.d +; SVE-NEXT: sxth z0.d, p0/m, z0.d +; SVE-NEXT: sxth z1.d, p0/m, z1.d +; SVE-NEXT: add z0.d, z0.d, z1.d +; SVE-NEXT: asr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v2i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: sxth z0.d, p0/m, z0.d +; SVE2-NEXT: sxth z1.d, p0/m, z1.d +; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v2i16_lsh( %s0, %s1) { +; CHECK-LABEL: hadds_v2i16_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: sxth z1.d, p0/m, z1.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v2i16( %s0, %s1) { +; SVE-LABEL: haddu_v2i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.d, z0.d, #0xffff +; SVE-NEXT: and z1.d, z1.d, #0xffff +; SVE-NEXT: add z0.d, z0.d, z1.d +; SVE-NEXT: lsr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v2i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: and z0.d, z0.d, #0xffff +; SVE2-NEXT: and z1.d, z1.d, #0xffff +; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i16( %s0, %s1) { +; SVE-LABEL: hadds_v4i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: sxth z0.s, p0/m, z0.s +; SVE-NEXT: sxth z1.s, p0/m, z1.s +; SVE-NEXT: add z0.s, z0.s, z1.s +; SVE-NEXT: asr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v4i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: sxth z0.s, p0/m, z0.s +; SVE2-NEXT: sxth z1.s, p0/m, z1.s +; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i16_lsh( %s0, %s1) { +; CHECK-LABEL: hadds_v4i16_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v4i16( %s0, %s1) { +; SVE-LABEL: haddu_v4i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.s, z0.s, #0xffff +; SVE-NEXT: and z1.s, z1.s, #0xffff +; SVE-NEXT: add z0.s, z0.s, z1.s +; SVE-NEXT: lsr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v4i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: and z0.s, z0.s, #0xffff +; SVE2-NEXT: and z1.s, z1.s, #0xffff +; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v8i16( %s0, %s1) { +; SVE-LABEL: hadds_v8i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.h, z1.h, #1 +; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.h, z3.h, z2.h +; SVE-NEXT: and z0.h, z0.h, #0x1 +; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v8i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v8i16_lsh( %s0, %s1) { +; SVE-LABEL: hadds_v8i16_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.h, z1.h, #1 +; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.h, z3.h, z2.h +; SVE-NEXT: and z0.h, z0.h, #0x1 +; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v8i16_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v8i16( %s0, %s1) { +; SVE-LABEL: haddu_v8i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.h, z1.h, #1 +; SVE-NEXT: lsr z3.h, z0.h, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.h, z3.h, z2.h +; SVE-NEXT: and z0.h, z0.h, #0x1 +; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v8i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i8( %s0, %s1) { +; SVE-LABEL: hadds_v4i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: sxtb z0.s, p0/m, z0.s +; SVE-NEXT: sxtb z1.s, p0/m, z1.s +; SVE-NEXT: add z0.s, z0.s, z1.s +; SVE-NEXT: asr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v4i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: sxtb z0.s, p0/m, z0.s +; SVE2-NEXT: sxtb z1.s, p0/m, z1.s +; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v4i8_lsh( %s0, %s1) { +; CHECK-LABEL: hadds_v4i8_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v4i8( %s0, %s1) { +; SVE-LABEL: haddu_v4i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.s, z0.s, #0xff +; SVE-NEXT: and z1.s, z1.s, #0xff +; SVE-NEXT: add z0.s, z0.s, z1.s +; SVE-NEXT: lsr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v4i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: and z0.s, z0.s, #0xff +; SVE2-NEXT: and z1.s, z1.s, #0xff +; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v8i8( %s0, %s1) { +; SVE-LABEL: hadds_v8i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.h +; SVE-NEXT: sxtb z0.h, p0/m, z0.h +; SVE-NEXT: sxtb z1.h, p0/m, z1.h +; SVE-NEXT: add z0.h, z0.h, z1.h +; SVE-NEXT: asr z0.h, z0.h, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v8i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: sxtb z0.h, p0/m, z0.h +; SVE2-NEXT: sxtb z1.h, p0/m, z1.h +; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v8i8_lsh( %s0, %s1) { +; CHECK-LABEL: hadds_v8i8_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v8i8( %s0, %s1) { +; SVE-LABEL: haddu_v8i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.h, z0.h, #0xff +; SVE-NEXT: and z1.h, z1.h, #0xff +; SVE-NEXT: add z0.h, z0.h, z1.h +; SVE-NEXT: lsr z0.h, z0.h, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v8i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: and z0.h, z0.h, #0xff +; SVE2-NEXT: and z1.h, z1.h, #0xff +; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v16i8( %s0, %s1) { +; SVE-LABEL: hadds_v16i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.b, z1.b, #1 +; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.b, z3.b, z2.b +; SVE-NEXT: and z0.b, z0.b, #0x1 +; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v16i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.b +; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = ashr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @hadds_v16i8_lsh( %s0, %s1) { +; SVE-LABEL: hadds_v16i8_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.b, z1.b, #1 +; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.b, z3.b, z2.b +; SVE-NEXT: and z0.b, z0.b, #0x1 +; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v16i8_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.b +; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %m = add nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @haddu_v16i8( %s0, %s1) { +; SVE-LABEL: haddu_v16i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.b, z1.b, #1 +; SVE-NEXT: lsr z3.b, z0.b, #1 +; SVE-NEXT: and z0.d, z0.d, z1.d +; SVE-NEXT: add z1.b, z3.b, z2.b +; SVE-NEXT: and z0.b, z0.b, #0x1 +; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v16i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.b +; SVE2-NEXT: uhadd z0.b, p0/m, z0.b, z1.b +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %m = add nuw nsw %s0s, %s1s + %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %s2 = trunc %s to + ret %s2 +} + +define @rhadds_v2i64( %s0, %s1) { +; SVE-LABEL: rhadds_v2i64: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.d, z1.d, #1 +; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.d, z3.d, z2.d +; SVE-NEXT: and z0.d, z0.d, #0x1 +; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v2i64: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v2i64_lsh( %s0, %s1) { +; SVE-LABEL: rhadds_v2i64_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.d, z1.d, #1 +; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.d, z3.d, z2.d +; SVE-NEXT: and z0.d, z0.d, #0x1 +; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v2i64_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v2i64( %s0, %s1) { +; SVE-LABEL: rhaddu_v2i64: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.d, z1.d, #1 +; SVE-NEXT: lsr z3.d, z0.d, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.d, z3.d, z2.d +; SVE-NEXT: and z0.d, z0.d, #0x1 +; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v2i64: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v2i32( %s0, %s1) { +; CHECK-LABEL: rhadds_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: asr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v2i32_lsh( %s0, %s1) { +; CHECK-LABEL: rhadds_v2i32_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v2i32( %s0, %s1) { +; SVE-LABEL: rhaddu_v2i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; SVE-NEXT: and z0.d, z0.d, #0xffffffff +; SVE-NEXT: and z1.d, z1.d, #0xffffffff +; SVE-NEXT: eor z0.d, z0.d, z2.d +; SVE-NEXT: sub z0.d, z1.d, z0.d +; SVE-NEXT: lsr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v2i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: and z0.d, z0.d, #0xffffffff +; SVE2-NEXT: and z1.d, z1.d, #0xffffffff +; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i32( %s0, %s1) { +; SVE-LABEL: rhadds_v4i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.s, z1.s, #1 +; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.s, z3.s, z2.s +; SVE-NEXT: and z0.s, z0.s, #0x1 +; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v4i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i32_lsh( %s0, %s1) { +; SVE-LABEL: rhadds_v4i32_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.s, z1.s, #1 +; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.s, z3.s, z2.s +; SVE-NEXT: and z0.s, z0.s, #0x1 +; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v4i32_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v4i32( %s0, %s1) { +; SVE-LABEL: rhaddu_v4i32: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.s, z1.s, #1 +; SVE-NEXT: lsr z3.s, z0.s, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.s, z3.s, z2.s +; SVE-NEXT: and z0.s, z0.s, #0x1 +; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v4i32: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v2i16( %s0, %s1) { +; CHECK-LABEL: rhadds_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: sxth z1.d, p0/m, z1.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: asr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v2i16_lsh( %s0, %s1) { +; CHECK-LABEL: rhadds_v2i16_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: sxth z1.d, p0/m, z1.d +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v2i16( %s0, %s1) { +; CHECK-LABEL: rhaddu_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: and z1.d, z1.d, #0xffff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i16( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: asr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i16_lsh( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i16_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: sxth z1.s, p0/m, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v4i16( %s0, %s1) { +; SVE-LABEL: rhaddu_v4i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; SVE-NEXT: and z0.s, z0.s, #0xffff +; SVE-NEXT: and z1.s, z1.s, #0xffff +; SVE-NEXT: eor z0.d, z0.d, z2.d +; SVE-NEXT: sub z0.s, z1.s, z0.s +; SVE-NEXT: lsr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v4i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: and z0.s, z0.s, #0xffff +; SVE2-NEXT: and z1.s, z1.s, #0xffff +; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v8i16( %s0, %s1) { +; SVE-LABEL: rhadds_v8i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.h, z1.h, #1 +; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.h, z3.h, z2.h +; SVE-NEXT: and z0.h, z0.h, #0x1 +; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v8i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v8i16_lsh( %s0, %s1) { +; SVE-LABEL: rhadds_v8i16_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.h, z1.h, #1 +; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.h, z3.h, z2.h +; SVE-NEXT: and z0.h, z0.h, #0x1 +; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v8i16_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v8i16( %s0, %s1) { +; SVE-LABEL: rhaddu_v8i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.h, z1.h, #1 +; SVE-NEXT: lsr z3.h, z0.h, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.h, z3.h, z2.h +; SVE-NEXT: and z0.h, z0.h, #0x1 +; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v8i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i8( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: asr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v4i8_lsh( %s0, %s1) { +; CHECK-LABEL: rhadds_v4i8_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v4i8( %s0, %s1) { +; CHECK-LABEL: rhaddu_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v8i8( %s0, %s1) { +; CHECK-LABEL: rhadds_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: asr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v8i8_lsh( %s0, %s1) { +; CHECK-LABEL: rhadds_v8i8_lsh: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 +; CHECK-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v8i8( %s0, %s1) { +; SVE-LABEL: rhaddu_v8i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff +; SVE-NEXT: and z0.h, z0.h, #0xff +; SVE-NEXT: and z1.h, z1.h, #0xff +; SVE-NEXT: eor z0.d, z0.d, z2.d +; SVE-NEXT: sub z0.h, z1.h, z0.h +; SVE-NEXT: lsr z0.h, z0.h, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v8i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.h +; SVE2-NEXT: and z0.h, z0.h, #0xff +; SVE2-NEXT: and z1.h, z1.h, #0xff +; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v16i8( %s0, %s1) { +; SVE-LABEL: rhadds_v16i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.b, z1.b, #1 +; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.b, z3.b, z2.b +; SVE-NEXT: and z0.b, z0.b, #0x1 +; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v16i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.b +; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = ashr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhadds_v16i8_lsh( %s0, %s1) { +; SVE-LABEL: rhadds_v16i8_lsh: +; SVE: // %bb.0: // %entry +; SVE-NEXT: asr z2.b, z1.b, #1 +; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.b, z3.b, z2.b +; SVE-NEXT: and z0.b, z0.b, #0x1 +; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: ret +; +; SVE2-LABEL: rhadds_v16i8_lsh: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.b +; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b +; SVE2-NEXT: ret +entry: + %s0s = sext %s0 to + %s1s = sext %s1 to + %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} + +define @rhaddu_v16i8( %s0, %s1) { +; SVE-LABEL: rhaddu_v16i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: lsr z2.b, z1.b, #1 +; SVE-NEXT: lsr z3.b, z0.b, #1 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: add z1.b, z3.b, z2.b +; SVE-NEXT: and z0.b, z0.b, #0x1 +; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v16i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.b +; SVE2-NEXT: urhadd z0.b, p0/m, z0.b, z1.b +; SVE2-NEXT: ret +entry: + %s0s = zext %s0 to + %s1s = zext %s1 to + %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %add2 = add nuw nsw %add, %s1s + %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) + %result = trunc %s to + ret %result +} diff --git a/llvm/test/CodeGen/AArch64/sve2-hadd.ll b/llvm/test/CodeGen/AArch64/sve2-hadd.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve2-hadd.ll +++ /dev/null @@ -1,627 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple aarch64-none-eabi -mattr=+sve2 -o - | FileCheck %s - -define @hadds_v2i64( %s0, %s1) { -; CHECK-LABEL: hadds_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: shadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v2i64( %s0, %s1) { -; CHECK-LABEL: haddu_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v2i32( %s0, %s1) { -; CHECK-LABEL: hadds_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d -; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw] -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v2i32( %s0, %s1) { -; CHECK-LABEL: haddu_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v4i32( %s0, %s1) { -; CHECK-LABEL: hadds_v4i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: shadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v4i32( %s0, %s1) { -; CHECK-LABEL: haddu_v4i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v2i16( %s0, %s1) { -; CHECK-LABEL: hadds_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d -; CHECK-NEXT: sxth z1.d, p0/m, z1.d -; CHECK-NEXT: add z0.d, z0.d, z1.d -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v2i16( %s0, %s1) { -; CHECK-LABEL: haddu_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z0.d, z0.d, #0xffff -; CHECK-NEXT: and z1.d, z1.d, #0xffff -; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v4i16( %s0, %s1) { -; CHECK-LABEL: hadds_v4i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxth z0.s, p0/m, z0.s -; CHECK-NEXT: sxth z1.s, p0/m, z1.s -; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v4i16( %s0, %s1) { -; CHECK-LABEL: haddu_v4i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: and z0.s, z0.s, #0xffff -; CHECK-NEXT: and z1.s, z1.s, #0xffff -; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v8i16( %s0, %s1) { -; CHECK-LABEL: hadds_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: shadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v8i16( %s0, %s1) { -; CHECK-LABEL: haddu_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v4i8( %s0, %s1) { -; CHECK-LABEL: hadds_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxtb z0.s, p0/m, z0.s -; CHECK-NEXT: sxtb z1.s, p0/m, z1.s -; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: and z0.s, z0.s, #0xffff -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v4i8( %s0, %s1) { -; CHECK-LABEL: haddu_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v8i8( %s0, %s1) { -; CHECK-LABEL: hadds_v8i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sxtb z0.h, p0/m, z0.h -; CHECK-NEXT: sxtb z1.h, p0/m, z1.h -; CHECK-NEXT: add z0.h, z0.h, z1.h -; CHECK-NEXT: lsr z0.h, z0.h, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v8i8( %s0, %s1) { -; CHECK-LABEL: haddu_v8i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: and z0.h, z0.h, #0xff -; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @hadds_v16i8( %s0, %s1) { -; CHECK-LABEL: hadds_v16i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: shadd z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %m = add nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @haddu_v16i8( %s0, %s1) { -; CHECK-LABEL: haddu_v16i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: uhadd z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %m = add nuw nsw %s0s, %s1s - %s = lshr %m, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %s2 = trunc %s to - ret %s2 -} - -define @rhadds_v2i64( %s0, %s1) { -; CHECK-LABEL: rhadds_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: srhadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v2i64( %s0, %s1) { -; CHECK-LABEL: rhaddu_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i128 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v2i32( %s0, %s1) { -; CHECK-LABEL: rhadds_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sxtw z0.d, p0/m, z0.d -; CHECK-NEXT: sxtw z1.d, p0/m, z1.d -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.d, z1.d, z0.d -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v2i32( %s0, %s1) { -; CHECK-LABEL: rhaddu_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v4i32( %s0, %s1) { -; CHECK-LABEL: rhadds_v4i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: srhadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v4i32( %s0, %s1) { -; CHECK-LABEL: rhaddu_v4i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v2i16( %s0, %s1) { -; CHECK-LABEL: rhadds_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sxth z0.d, p0/m, z0.d -; CHECK-NEXT: sxth z1.d, p0/m, z1.d -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.d, z1.d, z0.d -; CHECK-NEXT: and z0.d, z0.d, #0xffffffff -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v2i16( %s0, %s1) { -; CHECK-LABEL: rhaddu_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z0.d, z0.d, #0xffff -; CHECK-NEXT: and z1.d, z1.d, #0xffff -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.d, z1.d, z0.d -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v4i16( %s0, %s1) { -; CHECK-LABEL: rhadds_v4i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sxth z0.s, p0/m, z0.s -; CHECK-NEXT: sxth z1.s, p0/m, z1.s -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.s, z1.s, z0.s -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v4i16( %s0, %s1) { -; CHECK-LABEL: rhaddu_v4i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: and z0.s, z0.s, #0xffff -; CHECK-NEXT: and z1.s, z1.s, #0xffff -; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v8i16( %s0, %s1) { -; CHECK-LABEL: rhadds_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: srhadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v8i16( %s0, %s1) { -; CHECK-LABEL: rhaddu_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v4i8( %s0, %s1) { -; CHECK-LABEL: rhadds_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sxtb z0.s, p0/m, z0.s -; CHECK-NEXT: sxtb z1.s, p0/m, z1.s -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.s, z1.s, z0.s -; CHECK-NEXT: and z0.s, z0.s, #0xffff -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v4i8( %s0, %s1) { -; CHECK-LABEL: rhaddu_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.s, z1.s, z0.s -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v8i8( %s0, %s1) { -; CHECK-LABEL: rhadds_v8i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sxtb z0.h, p0/m, z0.h -; CHECK-NEXT: sxtb z1.h, p0/m, z1.h -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.h, z1.h, z0.h -; CHECK-NEXT: lsr z0.h, z0.h, #1 -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v8i8( %s0, %s1) { -; CHECK-LABEL: rhaddu_v8i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: and z0.h, z0.h, #0xff -; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhadds_v16i8( %s0, %s1) { -; CHECK-LABEL: rhadds_v16i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: srhadd z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret -entry: - %s0s = sext %s0 to - %s1s = sext %s1 to - %add = add %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %add2 = add %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -} - -define @rhaddu_v16i8( %s0, %s1) { -; CHECK-LABEL: rhaddu_v16i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: urhadd z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret -entry: - %s0s = zext %s0 to - %s1s = zext %s1 to - %add = add nuw nsw %s0s, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %add2 = add nuw nsw %add, %s1s - %s = lshr %add2, shufflevector ( insertelement ( poison, i16 1, i32 0), poison, zeroinitializer) - %result = trunc %s to - ret %result -}