Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -894,6 +894,17 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". +class AdvSIMD_SVE_Int_Reduce_Intrinsic + : Intrinsic<[LLVMVectorElementType<0>], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + +class AdvSIMD_SVE_SADDV_Reduce_Intrinsic + : Intrinsic<[llvm_i64_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; class AdvSIMD_Pred2VectorArg_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -941,6 +952,18 @@ def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_saddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic; +def int_aarch64_sve_uaddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic; + +def int_aarch64_sve_smaxv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_umaxv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_sminv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_uminv : AdvSIMD_SVE_Int_Reduce_Intrinsic; + +def int_aarch64_sve_orv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_eorv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_andv : AdvSIMD_SVE_Int_Reduce_Intrinsic; + def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -155,6 +155,14 @@ SMAXV, UMAXV, + SMAXV_PRED, + UMAXV_PRED, + SMINV_PRED, + UMINV_PRED, + ORV_PRED, + EORV_PRED, + ANDV_PRED, + // Vector bitwise negation NOT, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -163,6 +163,9 @@ } if (Subtarget->hasSVE()) { + addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); + // Add legal sve predicate types addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); @@ -1279,6 +1282,13 @@ case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; + case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED"; + case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED"; + case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED"; + case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED"; + case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED"; + case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED"; + case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -10500,6 +10510,42 @@ DAG.getConstant(0, dl, MVT::i64)); } +static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc, SelectionDAG &DAG) { + SDLoc dl(N); + LLVMContext &Ctx = *DAG.getContext(); + EVT VT = N->getValueType(0); + SDValue Pred = N->getOperand(1); + SDValue Data = N->getOperand(2); + + if (VT == MVT::i8 || VT == MVT::i16) { + EVT ReduceVT = EVT::getVectorVT(Ctx, VT, 1); + + EVT OutputVT = EVT::getVectorVT(Ctx, MVT::i8, 16); + auto subregIdx = AArch64::bsub; + + if (VT == MVT::i16) { + OutputVT = EVT::getVectorVT(Ctx, MVT::i16, 8); + subregIdx = AArch64::hsub; + } + + SDValue Reduce = DAG.getNode(Opc, dl, ReduceVT, Pred, Data); + + SDValue Temp = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, OutputVT), 0); + + SDValue InsertSubreg = DAG.getTargetInsertSubreg(subregIdx, dl, OutputVT, Temp, Reduce); + + SDValue Zero = DAG.getConstant(0, dl, MVT::i64); + + SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, InsertSubreg, Zero); + + return Result; + } else if (VT == MVT::i32 || VT == MVT::i64) { + return DAG.getNode(Opc, dl, VT, Pred, Data); + } + + return SDValue(); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -10554,6 +10600,20 @@ case Intrinsic::aarch64_crc32h: case Intrinsic::aarch64_crc32ch: return tryCombineCRC32(0xffff, N, DAG); + case Intrinsic::aarch64_sve_smaxv: + return LowerSVEIntReduction(N, AArch64ISD::SMAXV_PRED, DAG); + case Intrinsic::aarch64_sve_umaxv: + return LowerSVEIntReduction(N, AArch64ISD::UMAXV_PRED, DAG); + case Intrinsic::aarch64_sve_sminv: + return LowerSVEIntReduction(N, AArch64ISD::SMINV_PRED, DAG); + case Intrinsic::aarch64_sve_uminv: + return LowerSVEIntReduction(N, AArch64ISD::UMINV_PRED, DAG); + case Intrinsic::aarch64_sve_orv: + return LowerSVEIntReduction(N, AArch64ISD::ORV_PRED, DAG); + case Intrinsic::aarch64_sve_eorv: + return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG); + case Intrinsic::aarch64_sve_andv: + return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG); } return SDValue(); } Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -86,9 +86,9 @@ // Helper fragment for an extract of the high portion of a 128-bit vector. def extract_high_v16i8 : - UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; + UnOpFrag<(v8i8 (extract_subvector (v16i8 node:$LHS), (i64 8)))>; def extract_high_v8i16 : - UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; + UnOpFrag<(v4i16 (extract_subvector (v8i16 node:$LHS), (i64 4)))>; def extract_high_v4i32 : UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; def extract_high_v2i64 : Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2122,7 +2122,7 @@ [(set GPR32z:$Rt, (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", - [(set FPR8Op:$Rt, + [(set (untyped FPR8Op:$Rt), (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", [(set (f16 FPR16Op:$Rt), @@ -2334,10 +2334,10 @@ [(set GPR32z:$Rt, (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", - [(set FPR8Op:$Rt, + [(set (untyped FPR8Op:$Rt), (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", - [(set FPR16Op:$Rt, + [(set (f16 FPR16Op:$Rt), (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", [(set (f32 FPR32Op:$Rt), @@ -2753,7 +2753,7 @@ [(store GPR32z:$Rt, (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", - [(store FPR8Op:$Rt, + [(store (untyped FPR8Op:$Rt), (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", [(store (f16 FPR16Op:$Rt), @@ -2875,7 +2875,7 @@ [(store GPR32z:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", - [(store FPR8Op:$Rt, + [(store (untyped FPR8Op:$Rt), (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", [(store (f16 FPR16Op:$Rt), Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -419,10 +419,10 @@ def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; } -def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { +def FPR8 : RegisterClass<"AArch64", [untyped, v1i8], 8, (sequence "B%u", 0, 31)> { let Size = 8; } -def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> { +def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, (sequence "H%u", 0, 31)> { let Size = 16; } def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,6 +10,16 @@ // //===----------------------------------------------------------------------===// +def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; + +def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; +def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; +def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; +def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; +def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; +def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; +def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; + let Predicates = [HasSVE] in { def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">; @@ -53,15 +63,17 @@ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>; // SVE predicated integer reductions. - defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; - defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">; - defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">; - defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">; - defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">; - defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">; - defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">; - defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">; - defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">; + defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>; + defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv>; + defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_pred>; + defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_pred>; + defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_pred>; + defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_pred>; + defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_pred>; + defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_pred>; + defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_pred>; + + defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">; defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -5782,31 +5782,50 @@ let Inst{4-0} = Vd; } -multiclass sve_int_reduce_0_saddv opc, string asm> { +multiclass sve_int_reduce_0_saddv opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; } -multiclass sve_int_reduce_0_uaddv opc, string asm> { +multiclass sve_int_reduce_0_uaddv opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve_int_reduce_1 opc, string asm> { +multiclass sve_int_reduce_1 opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>; def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>; def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve_int_reduce_2 opc, string asm> { +multiclass sve_int_reduce_2 opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>; def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>; def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_movprfx_pred sz8_32, bits<3> opc, string asm, Index: llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll @@ -0,0 +1,391 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define i64 @saddv_i8( %pg, %a) { +; CHECK-LABEL: saddv_i8: +; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, + %a) + ret i64 %out +} + +define i64 @saddv_i16( %pg, %a) { +; CHECK-LABEL: saddv_i16: +; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, + %a) + ret i64 %out +} + + +define i64 @saddv_i32( %pg, %a) { +; CHECK-LABEL: saddv_i32: +; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, + %a) + ret i64 %out +} + +define i64 @uaddv_i8( %pg, %a) { +; CHECK-LABEL: uaddv_i8: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, + %a) + ret i64 %out +} + +define i64 @uaddv_i16( %pg, %a) { +; CHECK-LABEL: uaddv_i16: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, + %a) + ret i64 %out +} + + +define i64 @uaddv_i32( %pg, %a) { +; CHECK-LABEL: uaddv_i32: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, + %a) + ret i64 %out +} + +define i64 @uaddv_i64( %pg, %a) { +; CHECK-LABEL: uaddv_i64: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @smaxv_i8( %pg, %a) { +; CHECK-LABEL: smaxv_i8: +; CHECK: smaxv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @smaxv_i16( %pg, %a) { +; CHECK-LABEL: smaxv_i16: +; CHECK: smaxv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @smaxv_i32( %pg, %a) { +; CHECK-LABEL: smaxv_i32: +; CHECK: smaxv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @smaxv_i64( %pg, %a) { +; CHECK-LABEL: smaxv_i64: +; CHECK: smaxv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @umaxv_i8( %pg, %a) { +; CHECK-LABEL: umaxv_i8: +; CHECK: umaxv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @umaxv_i16( %pg, %a) { +; CHECK-LABEL: umaxv_i16: +; CHECK: umaxv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @umaxv_i32( %pg, %a) { +; CHECK-LABEL: umaxv_i32: +; CHECK: umaxv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @umaxv_i64( %pg, %a) { +; CHECK-LABEL: umaxv_i64: +; CHECK: umaxv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @sminv_i8( %pg, %a) { +; CHECK-LABEL: sminv_i8: +; CHECK: sminv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @sminv_i16( %pg, %a) { +; CHECK-LABEL: sminv_i16: +; CHECK: sminv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @sminv_i32( %pg, %a) { +; CHECK-LABEL: sminv_i32: +; CHECK: sminv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @sminv_i64( %pg, %a) { +; CHECK-LABEL: sminv_i64: +; CHECK: sminv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @uminv_i8( %pg, %a) { +; CHECK-LABEL: uminv_i8: +; CHECK: uminv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @uminv_i16( %pg, %a) { +; CHECK-LABEL: uminv_i16: +; CHECK: uminv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @uminv_i32( %pg, %a) { +; CHECK-LABEL: uminv_i32: +; CHECK: uminv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @uminv_i64( %pg, %a) { +; CHECK-LABEL: uminv_i64: +; CHECK: uminv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @orv_i8( %pg, %a) { +; CHECK-LABEL: orv_i8: +; CHECK: orv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @orv_i16( %pg, %a) { +; CHECK-LABEL: orv_i16: +; CHECK: orv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @orv_i32( %pg, %a) { +; CHECK-LABEL: orv_i32: +; CHECK: orv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @orv_i64( %pg, %a) { +; CHECK-LABEL: orv_i64: +; CHECK: orv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @eorv_i8( %pg, %a) { +; CHECK-LABEL: eorv_i8: +; CHECK: eorv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @eorv_i16( %pg, %a) { +; CHECK-LABEL: eorv_i16: +; CHECK: eorv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @eorv_i32( %pg, %a) { +; CHECK-LABEL: eorv_i32: +; CHECK: eorv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @eorv_i64( %pg, %a) { +; CHECK-LABEL: eorv_i64: +; CHECK: eorv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i8 @andv_i8( %pg, %a) { +; CHECK-LABEL: andv_i8: +; CHECK: andv b[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: umov w0, v[[REDUCE]].b[0] +; CHECK-NEXT: ret + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, + %a) + ret i8 %out +} + +define i16 @andv_i16( %pg, %a) { +; CHECK-LABEL: andv_i16: +; CHECK: andv h[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: umov w0, v[[REDUCE]].h[0] +; CHECK-NEXT: ret + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16( %pg, + %a) + ret i16 %out +} + +define i32 @andv_i32( %pg, %a) { +; CHECK-LABEL: andv_i32: +; CHECK: andv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @andv_i64( %pg, %a) { +; CHECK-LABEL: andv_i64: +; CHECK: andv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, + %a) + ret i64 %out +} + +declare i64 @llvm.aarch64.sve.saddv.nxv16i8(, ) +declare i64 @llvm.aarch64.sve.saddv.nxv8i16(, ) +declare i64 @llvm.aarch64.sve.saddv.nxv4i32(, ) + +declare i64 @llvm.aarch64.sve.uaddv.nxv16i8(, ) +declare i64 @llvm.aarch64.sve.uaddv.nxv8i16(, ) +declare i64 @llvm.aarch64.sve.uaddv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.uaddv.nxv2i64(, ) + +declare i8 @llvm.aarch64.sve.smaxv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.smaxv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.smaxv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.smaxv.nxv2i64(, ) +declare i8 @llvm.aarch64.sve.umaxv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.umaxv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.umaxv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.umaxv.nxv2i64(, ) +declare i8 @llvm.aarch64.sve.sminv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.sminv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.sminv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.sminv.nxv2i64(, ) +declare i8 @llvm.aarch64.sve.uminv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.uminv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.uminv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.uminv.nxv2i64(, ) +declare i8 @llvm.aarch64.sve.orv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.orv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.orv.nxv4i32 (, ) +declare i64 @llvm.aarch64.sve.orv.nxv2i64 (, ) +declare i8 @llvm.aarch64.sve.eorv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.eorv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.eorv.nxv4i32 (, ) +declare i64 @llvm.aarch64.sve.eorv.nxv2i64 (, ) +declare i8 @llvm.aarch64.sve.andv.nxv16i8(, ) +declare i16 @llvm.aarch64.sve.andv.nxv8i16(, ) +declare i32 @llvm.aarch64.sve.andv.nxv4i32 (, ) +declare i64 @llvm.aarch64.sve.andv.nxv2i64 (, )