diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -226,6 +226,8 @@ SMAXV, UMAXV, + SADDV_PRED, + UADDV_PRED, SMAXV_PRED, UMAXV_PRED, SMINV_PRED, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -116,6 +116,27 @@ /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; +static inline EVT getPackedSVEVectorVT(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("unexpected element type for vector"); + case MVT::i8: + return MVT::nxv16i8; + case MVT::i16: + return MVT::nxv8i16; + case MVT::i32: + return MVT::nxv4i32; + case MVT::i64: + return MVT::nxv2i64; + case MVT::f16: + return MVT::nxv8f16; + case MVT::f32: + return MVT::nxv4f32; + case MVT::f64: + return MVT::nxv2f64; + } +} + /// Returns true if VT's elements occupy the lowest bit positions of its /// associated register class without any intervening space. /// @@ -1587,6 +1608,8 @@ MAKE_CASE(AArch64ISD::UMINV) MAKE_CASE(AArch64ISD::SMAXV) MAKE_CASE(AArch64ISD::UMAXV) + MAKE_CASE(AArch64ISD::SADDV_PRED) + MAKE_CASE(AArch64ISD::UADDV_PRED) MAKE_CASE(AArch64ISD::SMAXV_PRED) MAKE_CASE(AArch64ISD::UMAXV_PRED) MAKE_CASE(AArch64ISD::SMINV_PRED) @@ -12221,34 +12244,6 @@ DAG.getConstant(0, dl, MVT::i64)); } -static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc, - SelectionDAG &DAG) { - SDLoc dl(N); - LLVMContext &Ctx = *DAG.getContext(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - - EVT VT = N->getValueType(0); - SDValue Pred = N->getOperand(1); - SDValue Data = N->getOperand(2); - EVT DataVT = Data.getValueType(); - - if (DataVT.getVectorElementType().isScalarInteger() && - (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)) { - if (!TLI.isTypeLegal(DataVT)) - return SDValue(); - - EVT OutputVT = EVT::getVectorVT(Ctx, VT, - AArch64::NeonBitsPerVector / VT.getSizeInBits()); - SDValue Reduce = DAG.getNode(Opc, dl, OutputVT, Pred, Data); - SDValue Zero = DAG.getConstant(0, dl, MVT::i64); - SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Reduce, Zero); - - return Result; - } - - return SDValue(); -} - static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Op1 = N->getOperand(1); @@ -12392,6 +12387,25 @@ return DAG.getZExtOrTrunc(Res, DL, VT); } +static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, + SelectionDAG &DAG) { + SDLoc DL(N); + + SDValue Pred = N->getOperand(1); + SDValue VecToReduce = N->getOperand(2); + + // NOTE: The integer reduction's result type is not always linked to the + // operand's element type so we construct it from the intrinsic's result type. + EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0)); + SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce); + + // SVE reductions set the whole vector register with the first element + // containing the reduction result, which we'll now extract. + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, + Zero); +} + static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG) { SDLoc DL(N); @@ -12505,20 +12519,28 @@ case Intrinsic::aarch64_crc32h: case Intrinsic::aarch64_crc32ch: return tryCombineCRC32(0xffff, N, DAG); + case Intrinsic::aarch64_sve_saddv: + // There is no i64 version of SADDV because the sign is irrelevant. + if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64) + return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG); + else + return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG); + case Intrinsic::aarch64_sve_uaddv: + return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG); case Intrinsic::aarch64_sve_smaxv: - return LowerSVEIntReduction(N, AArch64ISD::SMAXV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG); case Intrinsic::aarch64_sve_umaxv: - return LowerSVEIntReduction(N, AArch64ISD::UMAXV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG); case Intrinsic::aarch64_sve_sminv: - return LowerSVEIntReduction(N, AArch64ISD::SMINV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG); case Intrinsic::aarch64_sve_uminv: - return LowerSVEIntReduction(N, AArch64ISD::UMINV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG); case Intrinsic::aarch64_sve_orv: - return LowerSVEIntReduction(N, AArch64ISD::ORV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG); case Intrinsic::aarch64_sve_eorv: - return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG); case Intrinsic::aarch64_sve_andv: - return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG); + return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG); case Intrinsic::aarch64_sve_index: return LowerSVEIntrinsicIndex(N, DAG); case Intrinsic::aarch64_sve_dup: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -152,6 +152,8 @@ def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>; def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>; +def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>; +def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>; def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; @@ -307,8 +309,8 @@ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>; // SVE predicated integer reductions. - defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>; - defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>; + defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>; + defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>; defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>; defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>; defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -348,11 +348,6 @@ : Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)), (inst $Op1, $Op2)>; -class SVE_2_Op_Pat_Reduce_To_Neon -: Pat<(vtd (op vt1:$Op1, vt2:$Op2)), - (INSERT_SUBREG (vtd (IMPLICIT_DEF)), (inst $Op1, $Op2), sub)>; - class SVE_3_Op_Pat : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), @@ -4535,7 +4530,6 @@ def : SVE_2_Op_Pat(NAME # _D)>; } - //===----------------------------------------------------------------------===// // SVE Floating Point Accumulating Reduction Group //===----------------------------------------------------------------------===// @@ -7173,8 +7167,8 @@ //===----------------------------------------------------------------------===// class sve_int_reduce sz8_32, bits<2> fmt, bits<3> opc, string asm, - ZPRRegOp zprty, RegisterClass regtype> -: I<(outs regtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + ZPRRegOp zprty, FPRasZPROperand dstOpType> +: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), asm, "\t$Vd, $Pg, $Zn", "", []>, Sched<[]> { @@ -7192,51 +7186,54 @@ let Inst{4-0} = Vd; } -multiclass sve_int_reduce_0_saddv opc, string asm, SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; - def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; - def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; -} - -multiclass sve_int_reduce_0_uaddv opc, string asm, SDPatternOperator op, SDPatternOperator opSaddv> { - def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; - def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; - def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; - def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>; - - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - def : SVE_2_Op_Pat(NAME # _D)>; -} - -multiclass sve_int_reduce_1 opc, string asm, SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>; - def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>; - def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>; - def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>; +multiclass sve_int_reduce_0_saddv opc, string asm, + SDPatternOperator op> { + def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>; + def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>; + def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _B), bsub>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _H), hsub>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _S), ssub>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _D), dsub>; + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; } -multiclass sve_int_reduce_2 opc, string asm, SDPatternOperator op> { - def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>; - def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>; - def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; - def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; - - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _B), bsub>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _H), hsub>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _S), ssub>; - def : SVE_2_Op_Pat_Reduce_To_Neon(NAME # _D), dsub>; +multiclass sve_int_reduce_0_uaddv opc, string asm, + SDPatternOperator op> { + def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>; + def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>; + def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>; + def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; +} + +multiclass sve_int_reduce_1 opc, string asm, + SDPatternOperator op> { + def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>; + def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>; + def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>; + def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; +} + +multiclass sve_int_reduce_2 opc, string asm, + SDPatternOperator op> { + def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>; + def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>; + def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>; + def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_movprfx_pred sz8_32, bits<3> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll --- a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -6,9 +7,10 @@ define i64 @saddv_i8( %pg, %a) { ; CHECK-LABEL: saddv_i8: -; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: saddv d0, p0, z0.b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, %a) ret i64 %out @@ -16,9 +18,10 @@ define i64 @saddv_i16( %pg, %a) { ; CHECK-LABEL: saddv_i16: -; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: saddv d0, p0, z0.h +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, %a) ret i64 %out @@ -27,19 +30,21 @@ define i64 @saddv_i32( %pg, %a) { ; CHECK-LABEL: saddv_i32: -; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: saddv d0, p0, z0.s +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, %a) ret i64 %out } define i64 @saddv_i64( %pg, %a) { -; CHECK-LABEL: saddv_i64 -; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK-LABEL: saddv_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uaddv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64( %pg, %a) ret i64 %out @@ -47,9 +52,10 @@ define i64 @uaddv_i8( %pg, %a) { ; CHECK-LABEL: uaddv_i8: -; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uaddv d0, p0, z0.b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, %a) ret i64 %out @@ -57,9 +63,10 @@ define i64 @uaddv_i16( %pg, %a) { ; CHECK-LABEL: uaddv_i16: -; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uaddv d0, p0, z0.h +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, %a) ret i64 %out @@ -68,9 +75,10 @@ define i64 @uaddv_i32( %pg, %a) { ; CHECK-LABEL: uaddv_i32: -; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uaddv d0, p0, z0.s +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, %a) ret i64 %out @@ -78,9 +86,10 @@ define i64 @uaddv_i64( %pg, %a) { ; CHECK-LABEL: uaddv_i64: -; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uaddv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, %a) ret i64 %out @@ -88,9 +97,10 @@ define i8 @smaxv_i8( %pg, %a) { ; CHECK-LABEL: smaxv_i8: -; CHECK: smaxv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smaxv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, %a) ret i8 %out @@ -98,9 +108,10 @@ define i16 @smaxv_i16( %pg, %a) { ; CHECK-LABEL: smaxv_i16: -; CHECK: smaxv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smaxv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %pg, %a) ret i16 %out @@ -108,9 +119,10 @@ define i32 @smaxv_i32( %pg, %a) { ; CHECK-LABEL: smaxv_i32: -; CHECK: smaxv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smaxv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, %a) ret i32 %out @@ -118,9 +130,10 @@ define i64 @smaxv_i64( %pg, %a) { ; CHECK-LABEL: smaxv_i64: -; CHECK: smaxv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smaxv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, %a) ret i64 %out @@ -128,9 +141,10 @@ define i8 @umaxv_i8( %pg, %a) { ; CHECK-LABEL: umaxv_i8: -; CHECK: umaxv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umaxv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, %a) ret i8 %out @@ -138,9 +152,10 @@ define i16 @umaxv_i16( %pg, %a) { ; CHECK-LABEL: umaxv_i16: -; CHECK: umaxv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umaxv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %pg, %a) ret i16 %out @@ -148,9 +163,10 @@ define i32 @umaxv_i32( %pg, %a) { ; CHECK-LABEL: umaxv_i32: -; CHECK: umaxv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umaxv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, %a) ret i32 %out @@ -158,9 +174,10 @@ define i64 @umaxv_i64( %pg, %a) { ; CHECK-LABEL: umaxv_i64: -; CHECK: umaxv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umaxv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, %a) ret i64 %out @@ -168,9 +185,10 @@ define i8 @sminv_i8( %pg, %a) { ; CHECK-LABEL: sminv_i8: -; CHECK: sminv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sminv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, %a) ret i8 %out @@ -178,9 +196,10 @@ define i16 @sminv_i16( %pg, %a) { ; CHECK-LABEL: sminv_i16: -; CHECK: sminv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sminv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %pg, %a) ret i16 %out @@ -188,9 +207,10 @@ define i32 @sminv_i32( %pg, %a) { ; CHECK-LABEL: sminv_i32: -; CHECK: sminv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sminv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, %a) ret i32 %out @@ -198,9 +218,10 @@ define i64 @sminv_i64( %pg, %a) { ; CHECK-LABEL: sminv_i64: -; CHECK: sminv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sminv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, %a) ret i64 %out @@ -208,9 +229,10 @@ define i8 @uminv_i8( %pg, %a) { ; CHECK-LABEL: uminv_i8: -; CHECK: uminv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uminv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, %a) ret i8 %out @@ -218,9 +240,10 @@ define i16 @uminv_i16( %pg, %a) { ; CHECK-LABEL: uminv_i16: -; CHECK: uminv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uminv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %pg, %a) ret i16 %out @@ -228,9 +251,10 @@ define i32 @uminv_i32( %pg, %a) { ; CHECK-LABEL: uminv_i32: -; CHECK: uminv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uminv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, %a) ret i32 %out @@ -238,9 +262,10 @@ define i64 @uminv_i64( %pg, %a) { ; CHECK-LABEL: uminv_i64: -; CHECK: uminv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uminv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, %a) ret i64 %out @@ -248,9 +273,10 @@ define i8 @orv_i8( %pg, %a) { ; CHECK-LABEL: orv_i8: -; CHECK: orv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: orv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, %a) ret i8 %out @@ -258,9 +284,10 @@ define i16 @orv_i16( %pg, %a) { ; CHECK-LABEL: orv_i16: -; CHECK: orv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: orv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.orv.nxv8i16( %pg, %a) ret i16 %out @@ -268,9 +295,10 @@ define i32 @orv_i32( %pg, %a) { ; CHECK-LABEL: orv_i32: -; CHECK: orv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: orv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, %a) ret i32 %out @@ -278,9 +306,10 @@ define i64 @orv_i64( %pg, %a) { ; CHECK-LABEL: orv_i64: -; CHECK: orv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: orv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, %a) ret i64 %out @@ -288,9 +317,10 @@ define i8 @eorv_i8( %pg, %a) { ; CHECK-LABEL: eorv_i8: -; CHECK: eorv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: eorv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, %a) ret i8 %out @@ -298,9 +328,10 @@ define i16 @eorv_i16( %pg, %a) { ; CHECK-LABEL: eorv_i16: -; CHECK: eorv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: eorv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %pg, %a) ret i16 %out @@ -308,9 +339,10 @@ define i32 @eorv_i32( %pg, %a) { ; CHECK-LABEL: eorv_i32: -; CHECK: eorv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: eorv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, %a) ret i32 %out @@ -318,9 +350,10 @@ define i64 @eorv_i64( %pg, %a) { ; CHECK-LABEL: eorv_i64: -; CHECK: eorv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: eorv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, %a) ret i64 %out @@ -328,9 +361,10 @@ define i8 @andv_i8( %pg, %a) { ; CHECK-LABEL: andv_i8: -; CHECK: andv b[[REDUCE:[0-9]+]], p0, z0.b -; CHECK: umov w0, v[[REDUCE]].b[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: andv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, %a) ret i8 %out @@ -338,9 +372,10 @@ define i16 @andv_i16( %pg, %a) { ; CHECK-LABEL: andv_i16: -; CHECK: andv h[[REDUCE:[0-9]+]], p0, z0.h -; CHECK: umov w0, v[[REDUCE]].h[0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: andv h0, p0, z0.h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i16 @llvm.aarch64.sve.andv.nxv8i16( %pg, %a) ret i16 %out @@ -348,9 +383,10 @@ define i32 @andv_i32( %pg, %a) { ; CHECK-LABEL: andv_i32: -; CHECK: andv s[[REDUCE:[0-9]+]], p0, z0.s -; CHECK: fmov w0, s[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: andv s0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, %a) ret i32 %out @@ -358,9 +394,10 @@ define i64 @andv_i64( %pg, %a) { ; CHECK-LABEL: andv_i64: -; CHECK: andv d[[REDUCE:[0-9]+]], p0, z0.d -; CHECK: fmov x0, d[[REDUCE]] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: andv d0, p0, z0.d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, %a) ret i64 %out