diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -113,6 +113,24 @@ // float to single-width float, rounding towards odd). Takes a double-width // float vector and produces a single-width float vector. VFNCVT_ROD, + // These nodes match the semantics of the corresponding RVV vector reduction + // instructions. They produce a vector result which is the reduction + // performed over the first vector operand plus the first element of the + // second vector operand. The first operand is an unconstrained vector type, + // and the result and second operand's types are expected to be the + // corresponding full-width LMUL=1 type for the first operand: + // nxv8i8 = vecreduce_add nxv32i8, nxv8i8 + // nxv2i32 = vecreduce_add nxv8i32, nxv2i32 + // The different in types does introduce extra vsetvli instructions but + // similarly it reduces the number of registers consumed per reduction. + VECREDUCE_ADD, + VECREDUCE_UMAX, + VECREDUCE_SMAX, + VECREDUCE_UMIN, + VECREDUCE_SMIN, + VECREDUCE_AND, + VECREDUCE_OR, + VECREDUCE_XOR, }; } // namespace RISCVISD @@ -314,6 +332,7 @@ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -377,6 +377,15 @@ setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); + + setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); + setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); } for (MVT VT : BoolVecVTs) { @@ -418,6 +427,17 @@ // Custom-lower insert/extract operations to simplify patterns. setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + + // Custom-lower reduction operations to set up the corresponding custom + // nodes' operands. + setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); + setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); } // Expand various CCs to best match the RVV ISA, which natively supports UNE @@ -893,6 +913,15 @@ return Op; } + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + return lowerVECREDUCE(Op, DAG); } } @@ -1615,6 +1644,60 @@ } } +static std::pair +getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) { + switch (ISDOpcode) { + default: + llvm_unreachable("Unhandled reduction"); + case ISD::VECREDUCE_ADD: + return {RISCVISD::VECREDUCE_ADD, 0}; + case ISD::VECREDUCE_UMAX: + return {RISCVISD::VECREDUCE_UMAX, 0}; + case ISD::VECREDUCE_SMAX: + return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)}; + case ISD::VECREDUCE_UMIN: + return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)}; + case ISD::VECREDUCE_SMIN: + return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)}; + case ISD::VECREDUCE_AND: + return {RISCVISD::VECREDUCE_AND, -1}; + case ISD::VECREDUCE_OR: + return {RISCVISD::VECREDUCE_OR, 0}; + case ISD::VECREDUCE_XOR: + return {RISCVISD::VECREDUCE_XOR, 0}; + } +} + +// Take a (supported) standard ISD reduction opcode and transform it to a RISCV +// reduction opcode. Note that this returns a vector type, which must be +// further processed to access the scalar result in element 0. +SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + assert(Op.getValueType().isSimple() && + Op.getOperand(0).getValueType().isSimple() && + "Unexpected vector-reduce lowering"); + MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + unsigned RVVOpcode; + uint64_t IdentityVal; + std::tie(RVVOpcode, IdentityVal) = + getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits()); + // We have to perform a bit of a dance to get from our vector type to the + // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector + // element type to find the type which fills a single register. Be careful to + // use the operand's vector element type rather than the reduction's value + // type, as that has likely been extended to XLEN. + unsigned NumElts = 64 / VecEltVT.getSizeInBits(); + MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts); + SDValue IdentitySplat = + DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT)); + SDValue Reduction = + DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); + SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, + DAG.getConstant(0, DL, Subtarget.getXLenVT())); + return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); +} + // Returns the opcode of the target-specific SDNode that implements the 32-bit // form of the given Opcode. static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { @@ -1903,6 +1986,19 @@ } break; } + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMIN: + // The custom-lowering for these nodes returns a vector whose first element + // is the result of the reduction. Extract its first element and let the + // legalization for EXTRACT_VECTOR_ELT do the rest of the job. + Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); + break; } } @@ -4160,6 +4256,14 @@ NODE_NAME_CASE(VSLIDEDOWN) NODE_NAME_CASE(VID) NODE_NAME_CASE(VFNCVT_ROD) + NODE_NAME_CASE(VECREDUCE_ADD) + NODE_NAME_CASE(VECREDUCE_UMAX) + NODE_NAME_CASE(VECREDUCE_SMAX) + NODE_NAME_CASE(VECREDUCE_UMIN) + NODE_NAME_CASE(VECREDUCE_SMIN) + NODE_NAME_CASE(VECREDUCE_AND) + NODE_NAME_CASE(VECREDUCE_OR) + NODE_NAME_CASE(VECREDUCE_XOR) } // clang-format on return nullptr; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -45,6 +45,13 @@ dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix); } +def SDTRVVVecReduce : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2> +]>; + +foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR"] in + def rvv_vecreduce_#kind : SDNode<"RISCVISD::VECREDUCE_"#kind, SDTRVVVecReduce>; + multiclass VPatUSLoadStoreSDNode { + foreach vti = AllIntegerVectors in { + defvar vti_m1 = !cast("VI" # vti.SEW # "M1"); + def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2)), + (!cast(instruction_name#"_VS_"#vti.LMul.MX) + (vti_m1.Vector (IMPLICIT_DEF)), + (vti.Vector vti.RegClass:$rs1), + (vti_m1.Vector VR:$rs2), + vti.AVL, vti.SEW)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -475,6 +494,16 @@ vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>; } +// 15.1. Vector Single-Width Integer Reduction Instructions +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; +defm "" : VPatReductionSDNode; + // 16.1. Vector Mask-Register Logical Instructions foreach mti = AllMasks in { def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)), diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -47,6 +47,8 @@ Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); + + bool shouldExpandReduction(const IntrinsicInst *II) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -94,3 +94,19 @@ // Prevent hoisting in unknown cases. return TTI::TCC_Free; } + +bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { + // Currently, the ExpandReductions pass can't expand scalable-vector + // reductions, but we still request expansion as RVV doesn't support certain + // reductions and the SelectionDAG can't legalize them either. + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + return true; + } +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll @@ -0,0 +1,1641 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s + +declare i8 @llvm.vector.reduce.add.nxv1i8() + +define signext i8 @vreduce_add_nxv1i8( %v) { +; CHECK-LABEL: vreduce_add_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.nxv1i8() + +define signext i8 @vreduce_umax_nxv1i8( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.nxv1i8() + +define signext i8 @vreduce_smax_nxv1i8( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.nxv1i8() + +define signext i8 @vreduce_umin_nxv1i8( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.nxv1i8() + +define signext i8 @vreduce_smin_nxv1i8( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.nxv1i8() + +define signext i8 @vreduce_and_nxv1i8( %v) { +; CHECK-LABEL: vreduce_and_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.nxv1i8() + +define signext i8 @vreduce_or_nxv1i8( %v) { +; CHECK-LABEL: vreduce_or_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.nxv1i8() + +define signext i8 @vreduce_xor_nxv1i8( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.nxv2i8() + +define signext i8 @vreduce_add_nxv2i8( %v) { +; CHECK-LABEL: vreduce_add_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.nxv2i8() + +define signext i8 @vreduce_umax_nxv2i8( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.nxv2i8() + +define signext i8 @vreduce_smax_nxv2i8( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.nxv2i8() + +define signext i8 @vreduce_umin_nxv2i8( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.nxv2i8() + +define signext i8 @vreduce_smin_nxv2i8( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.nxv2i8() + +define signext i8 @vreduce_and_nxv2i8( %v) { +; CHECK-LABEL: vreduce_and_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.nxv2i8() + +define signext i8 @vreduce_or_nxv2i8( %v) { +; CHECK-LABEL: vreduce_or_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.nxv2i8() + +define signext i8 @vreduce_xor_nxv2i8( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.nxv4i8() + +define signext i8 @vreduce_add_nxv4i8( %v) { +; CHECK-LABEL: vreduce_add_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.nxv4i8() + +define signext i8 @vreduce_umax_nxv4i8( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.nxv4i8() + +define signext i8 @vreduce_smax_nxv4i8( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.nxv4i8() + +define signext i8 @vreduce_umin_nxv4i8( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.nxv4i8() + +define signext i8 @vreduce_smin_nxv4i8( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.nxv4i8() + +define signext i8 @vreduce_and_nxv4i8( %v) { +; CHECK-LABEL: vreduce_and_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.nxv4i8() + +define signext i8 @vreduce_or_nxv4i8( %v) { +; CHECK-LABEL: vreduce_or_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.nxv4i8() + +define signext i8 @vreduce_xor_nxv4i8( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.add.nxv1i16() + +define signext i16 @vreduce_add_nxv1i16( %v) { +; CHECK-LABEL: vreduce_add_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.nxv1i16() + +define signext i16 @vreduce_umax_nxv1i16( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.nxv1i16() + +define signext i16 @vreduce_smax_nxv1i16( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.nxv1i16() + +define signext i16 @vreduce_umin_nxv1i16( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.nxv1i16() + +define signext i16 @vreduce_smin_nxv1i16( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.nxv1i16() + +define signext i16 @vreduce_and_nxv1i16( %v) { +; CHECK-LABEL: vreduce_and_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.nxv1i16() + +define signext i16 @vreduce_or_nxv1i16( %v) { +; CHECK-LABEL: vreduce_or_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.nxv1i16() + +define signext i16 @vreduce_xor_nxv1i16( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.nxv2i16() + +define signext i16 @vreduce_add_nxv2i16( %v) { +; CHECK-LABEL: vreduce_add_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.nxv2i16() + +define signext i16 @vreduce_umax_nxv2i16( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.nxv2i16() + +define signext i16 @vreduce_smax_nxv2i16( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.nxv2i16() + +define signext i16 @vreduce_umin_nxv2i16( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.nxv2i16() + +define signext i16 @vreduce_smin_nxv2i16( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.nxv2i16() + +define signext i16 @vreduce_and_nxv2i16( %v) { +; CHECK-LABEL: vreduce_and_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.nxv2i16() + +define signext i16 @vreduce_or_nxv2i16( %v) { +; CHECK-LABEL: vreduce_or_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.nxv2i16() + +define signext i16 @vreduce_xor_nxv2i16( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.nxv4i16() + +define signext i16 @vreduce_add_nxv4i16( %v) { +; CHECK-LABEL: vreduce_add_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.nxv4i16() + +define signext i16 @vreduce_umax_nxv4i16( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.nxv4i16() + +define signext i16 @vreduce_smax_nxv4i16( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.nxv4i16() + +define signext i16 @vreduce_umin_nxv4i16( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.nxv4i16() + +define signext i16 @vreduce_smin_nxv4i16( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.nxv4i16() + +define signext i16 @vreduce_and_nxv4i16( %v) { +; CHECK-LABEL: vreduce_and_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.nxv4i16() + +define signext i16 @vreduce_or_nxv4i16( %v) { +; CHECK-LABEL: vreduce_or_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.nxv4i16() + +define signext i16 @vreduce_xor_nxv4i16( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.add.nxv1i32() + +define i32 @vreduce_add_nxv1i32( %v) { +; CHECK-LABEL: vreduce_add_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.nxv1i32() + +define i32 @vreduce_umax_nxv1i32( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.nxv1i32() + +define i32 @vreduce_smax_nxv1i32( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.nxv1i32() + +define i32 @vreduce_umin_nxv1i32( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.nxv1i32() + +define i32 @vreduce_smin_nxv1i32( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.nxv1i32() + +define i32 @vreduce_and_nxv1i32( %v) { +; CHECK-LABEL: vreduce_and_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.nxv1i32() + +define i32 @vreduce_or_nxv1i32( %v) { +; CHECK-LABEL: vreduce_or_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.nxv1i32() + +define i32 @vreduce_xor_nxv1i32( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.nxv2i32() + +define i32 @vreduce_add_nxv2i32( %v) { +; CHECK-LABEL: vreduce_add_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.nxv2i32() + +define i32 @vreduce_umax_nxv2i32( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.nxv2i32() + +define i32 @vreduce_smax_nxv2i32( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.nxv2i32() + +define i32 @vreduce_umin_nxv2i32( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.nxv2i32() + +define i32 @vreduce_smin_nxv2i32( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.nxv2i32() + +define i32 @vreduce_and_nxv2i32( %v) { +; CHECK-LABEL: vreduce_and_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.nxv2i32() + +define i32 @vreduce_or_nxv2i32( %v) { +; CHECK-LABEL: vreduce_or_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.nxv2i32() + +define i32 @vreduce_xor_nxv2i32( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.nxv4i32() + +define i32 @vreduce_add_nxv4i32( %v) { +; CHECK-LABEL: vreduce_add_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.nxv4i32() + +define i32 @vreduce_umax_nxv4i32( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.nxv4i32() + +define i32 @vreduce_smax_nxv4i32( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.nxv4i32() + +define i32 @vreduce_umin_nxv4i32( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.nxv4i32() + +define i32 @vreduce_smin_nxv4i32( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.nxv4i32() + +define i32 @vreduce_and_nxv4i32( %v) { +; CHECK-LABEL: vreduce_and_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.nxv4i32() + +define i32 @vreduce_or_nxv4i32( %v) { +; CHECK-LABEL: vreduce_or_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.nxv4i32() + +define i32 @vreduce_xor_nxv4i32( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.add.nxv1i64() + +define i64 @vreduce_add_nxv1i64( %v) { +; CHECK-LABEL: vreduce_add_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.nxv1i64() + +define i64 @vreduce_umax_nxv1i64( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.nxv1i64() + +define i64 @vreduce_smax_nxv1i64( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.nxv1i64() + +define i64 @vreduce_umin_nxv1i64( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.nxv1i64() + +define i64 @vreduce_smin_nxv1i64( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v25, v26 +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.nxv1i64() + +define i64 @vreduce_and_nxv1i64( %v) { +; CHECK-LABEL: vreduce_and_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.nxv1i64() + +define i64 @vreduce_or_nxv1i64( %v) { +; CHECK-LABEL: vreduce_or_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.nxv1i64() + +define i64 @vreduce_xor_nxv1i64( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.nxv2i64() + +define i64 @vreduce_add_nxv2i64( %v) { +; CHECK-LABEL: vreduce_add_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.nxv2i64() + +define i64 @vreduce_umax_nxv2i64( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.nxv2i64() + +define i64 @vreduce_smax_nxv2i64( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.nxv2i64() + +define i64 @vreduce_umin_nxv2i64( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.nxv2i64() + +define i64 @vreduce_smin_nxv2i64( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v25, v26 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.nxv2i64() + +define i64 @vreduce_and_nxv2i64( %v) { +; CHECK-LABEL: vreduce_and_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.nxv2i64() + +define i64 @vreduce_or_nxv2i64( %v) { +; CHECK-LABEL: vreduce_or_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.nxv2i64() + +define i64 @vreduce_xor_nxv2i64( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.nxv4i64() + +define i64 @vreduce_add_nxv4i64( %v) { +; CHECK-LABEL: vreduce_add_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.nxv4i64() + +define i64 @vreduce_umax_nxv4i64( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.nxv4i64() + +define i64 @vreduce_smax_nxv4i64( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.nxv4i64() + +define i64 @vreduce_umin_nxv4i64( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.nxv4i64() + +define i64 @vreduce_smin_nxv4i64( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsll.vx v26, v26, a1 +; CHECK-NEXT: vor.vv v25, v25, v26 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.nxv4i64() + +define i64 @vreduce_and_nxv4i64( %v) { +; CHECK-LABEL: vreduce_and_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.nxv4i64() + +define i64 @vreduce_or_nxv4i64( %v) { +; CHECK-LABEL: vreduce_or_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.nxv4i64() + +define i64 @vreduce_xor_nxv4i64( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vmv.x.s a1, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) + ret i64 %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll @@ -0,0 +1,1529 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s + +declare i8 @llvm.vector.reduce.add.nxv1i8() + +define signext i8 @vreduce_add_nxv1i8( %v) { +; CHECK-LABEL: vreduce_add_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.nxv1i8() + +define signext i8 @vreduce_umax_nxv1i8( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.nxv1i8() + +define signext i8 @vreduce_smax_nxv1i8( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.nxv1i8() + +define signext i8 @vreduce_umin_nxv1i8( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.nxv1i8() + +define signext i8 @vreduce_smin_nxv1i8( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.nxv1i8() + +define signext i8 @vreduce_and_nxv1i8( %v) { +; CHECK-LABEL: vreduce_and_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.nxv1i8() + +define signext i8 @vreduce_or_nxv1i8( %v) { +; CHECK-LABEL: vreduce_or_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.nxv1i8() + +define signext i8 @vreduce_xor_nxv1i8( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.nxv2i8() + +define signext i8 @vreduce_add_nxv2i8( %v) { +; CHECK-LABEL: vreduce_add_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.nxv2i8() + +define signext i8 @vreduce_umax_nxv2i8( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.nxv2i8() + +define signext i8 @vreduce_smax_nxv2i8( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.nxv2i8() + +define signext i8 @vreduce_umin_nxv2i8( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.nxv2i8() + +define signext i8 @vreduce_smin_nxv2i8( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.nxv2i8() + +define signext i8 @vreduce_and_nxv2i8( %v) { +; CHECK-LABEL: vreduce_and_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.nxv2i8() + +define signext i8 @vreduce_or_nxv2i8( %v) { +; CHECK-LABEL: vreduce_or_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.nxv2i8() + +define signext i8 @vreduce_xor_nxv2i8( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.nxv4i8() + +define signext i8 @vreduce_add_nxv4i8( %v) { +; CHECK-LABEL: vreduce_add_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.nxv4i8() + +define signext i8 @vreduce_umax_nxv4i8( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.nxv4i8() + +define signext i8 @vreduce_smax_nxv4i8( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.nxv4i8() + +define signext i8 @vreduce_umin_nxv4i8( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.nxv4i8() + +define signext i8 @vreduce_smin_nxv4i8( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.nxv4i8() + +define signext i8 @vreduce_and_nxv4i8( %v) { +; CHECK-LABEL: vreduce_and_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.nxv4i8() + +define signext i8 @vreduce_or_nxv4i8( %v) { +; CHECK-LABEL: vreduce_or_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.nxv4i8() + +define signext i8 @vreduce_xor_nxv4i8( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.add.nxv1i16() + +define signext i16 @vreduce_add_nxv1i16( %v) { +; CHECK-LABEL: vreduce_add_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.nxv1i16() + +define signext i16 @vreduce_umax_nxv1i16( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.nxv1i16() + +define signext i16 @vreduce_smax_nxv1i16( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.nxv1i16() + +define signext i16 @vreduce_umin_nxv1i16( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.nxv1i16() + +define signext i16 @vreduce_smin_nxv1i16( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.nxv1i16() + +define signext i16 @vreduce_and_nxv1i16( %v) { +; CHECK-LABEL: vreduce_and_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.nxv1i16() + +define signext i16 @vreduce_or_nxv1i16( %v) { +; CHECK-LABEL: vreduce_or_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.nxv1i16() + +define signext i16 @vreduce_xor_nxv1i16( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.nxv2i16() + +define signext i16 @vreduce_add_nxv2i16( %v) { +; CHECK-LABEL: vreduce_add_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.nxv2i16() + +define signext i16 @vreduce_umax_nxv2i16( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.nxv2i16() + +define signext i16 @vreduce_smax_nxv2i16( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.nxv2i16() + +define signext i16 @vreduce_umin_nxv2i16( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.nxv2i16() + +define signext i16 @vreduce_smin_nxv2i16( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.nxv2i16() + +define signext i16 @vreduce_and_nxv2i16( %v) { +; CHECK-LABEL: vreduce_and_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.nxv2i16() + +define signext i16 @vreduce_or_nxv2i16( %v) { +; CHECK-LABEL: vreduce_or_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.nxv2i16() + +define signext i16 @vreduce_xor_nxv2i16( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.nxv4i16() + +define signext i16 @vreduce_add_nxv4i16( %v) { +; CHECK-LABEL: vreduce_add_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.nxv4i16() + +define signext i16 @vreduce_umax_nxv4i16( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.nxv4i16() + +define signext i16 @vreduce_smax_nxv4i16( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.nxv4i16() + +define signext i16 @vreduce_umin_nxv4i16( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.nxv4i16() + +define signext i16 @vreduce_smin_nxv4i16( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.nxv4i16() + +define signext i16 @vreduce_and_nxv4i16( %v) { +; CHECK-LABEL: vreduce_and_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.nxv4i16() + +define signext i16 @vreduce_or_nxv4i16( %v) { +; CHECK-LABEL: vreduce_or_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.nxv4i16() + +define signext i16 @vreduce_xor_nxv4i16( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.add.nxv1i32() + +define signext i32 @vreduce_add_nxv1i32( %v) { +; CHECK-LABEL: vreduce_add_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.nxv1i32() + +define signext i32 @vreduce_umax_nxv1i32( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.nxv1i32() + +define signext i32 @vreduce_smax_nxv1i32( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.nxv1i32() + +define signext i32 @vreduce_umin_nxv1i32( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.nxv1i32() + +define signext i32 @vreduce_smin_nxv1i32( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.nxv1i32() + +define signext i32 @vreduce_and_nxv1i32( %v) { +; CHECK-LABEL: vreduce_and_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.nxv1i32() + +define signext i32 @vreduce_or_nxv1i32( %v) { +; CHECK-LABEL: vreduce_or_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.nxv1i32() + +define signext i32 @vreduce_xor_nxv1i32( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.nxv2i32() + +define signext i32 @vreduce_add_nxv2i32( %v) { +; CHECK-LABEL: vreduce_add_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.nxv2i32() + +define signext i32 @vreduce_umax_nxv2i32( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.nxv2i32() + +define signext i32 @vreduce_smax_nxv2i32( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.nxv2i32() + +define signext i32 @vreduce_umin_nxv2i32( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.nxv2i32() + +define signext i32 @vreduce_smin_nxv2i32( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.nxv2i32() + +define signext i32 @vreduce_and_nxv2i32( %v) { +; CHECK-LABEL: vreduce_and_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.nxv2i32() + +define signext i32 @vreduce_or_nxv2i32( %v) { +; CHECK-LABEL: vreduce_or_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.nxv2i32() + +define signext i32 @vreduce_xor_nxv2i32( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.nxv4i32() + +define signext i32 @vreduce_add_nxv4i32( %v) { +; CHECK-LABEL: vreduce_add_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.nxv4i32() + +define signext i32 @vreduce_umax_nxv4i32( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.nxv4i32() + +define signext i32 @vreduce_smax_nxv4i32( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.nxv4i32() + +define signext i32 @vreduce_umin_nxv4i32( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.nxv4i32() + +define signext i32 @vreduce_smin_nxv4i32( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.nxv4i32() + +define signext i32 @vreduce_and_nxv4i32( %v) { +; CHECK-LABEL: vreduce_and_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.nxv4i32() + +define signext i32 @vreduce_or_nxv4i32( %v) { +; CHECK-LABEL: vreduce_or_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.nxv4i32() + +define signext i32 @vreduce_xor_nxv4i32( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.add.nxv1i64() + +define i64 @vreduce_add_nxv1i64( %v) { +; CHECK-LABEL: vreduce_add_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.nxv1i64() + +define i64 @vreduce_umax_nxv1i64( %v) { +; CHECK-LABEL: vreduce_umax_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.nxv1i64() + +define i64 @vreduce_smax_nxv1i64( %v) { +; CHECK-LABEL: vreduce_smax_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.nxv1i64() + +define i64 @vreduce_umin_nxv1i64( %v) { +; CHECK-LABEL: vreduce_umin_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.nxv1i64() + +define i64 @vreduce_smin_nxv1i64( %v) { +; CHECK-LABEL: vreduce_smin_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.nxv1i64() + +define i64 @vreduce_and_nxv1i64( %v) { +; CHECK-LABEL: vreduce_and_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.nxv1i64() + +define i64 @vreduce_or_nxv1i64( %v) { +; CHECK-LABEL: vreduce_or_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.nxv1i64() + +define i64 @vreduce_xor_nxv1i64( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.nxv2i64() + +define i64 @vreduce_add_nxv2i64( %v) { +; CHECK-LABEL: vreduce_add_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.nxv2i64() + +define i64 @vreduce_umax_nxv2i64( %v) { +; CHECK-LABEL: vreduce_umax_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.nxv2i64() + +define i64 @vreduce_smax_nxv2i64( %v) { +; CHECK-LABEL: vreduce_smax_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.nxv2i64() + +define i64 @vreduce_umin_nxv2i64( %v) { +; CHECK-LABEL: vreduce_umin_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.nxv2i64() + +define i64 @vreduce_smin_nxv2i64( %v) { +; CHECK-LABEL: vreduce_smin_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.nxv2i64() + +define i64 @vreduce_and_nxv2i64( %v) { +; CHECK-LABEL: vreduce_and_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.nxv2i64() + +define i64 @vreduce_or_nxv2i64( %v) { +; CHECK-LABEL: vreduce_or_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.nxv2i64() + +define i64 @vreduce_xor_nxv2i64( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.nxv4i64() + +define i64 @vreduce_add_nxv4i64( %v) { +; CHECK-LABEL: vreduce_add_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.nxv4i64() + +define i64 @vreduce_umax_nxv4i64( %v) { +; CHECK-LABEL: vreduce_umax_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.nxv4i64() + +define i64 @vreduce_smax_nxv4i64( %v) { +; CHECK-LABEL: vreduce_smax_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.nxv4i64() + +define i64 @vreduce_umin_nxv4i64( %v) { +; CHECK-LABEL: vreduce_umin_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.nxv4i64() + +define i64 @vreduce_smin_nxv4i64( %v) { +; CHECK-LABEL: vreduce_smin_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.nxv4i64() + +define i64 @vreduce_and_nxv4i64( %v) { +; CHECK-LABEL: vreduce_and_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.nxv4i64() + +define i64 @vreduce_or_nxv4i64( %v) { +; CHECK-LABEL: vreduce_or_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.nxv4i64() + +define i64 @vreduce_xor_nxv4i64( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) + ret i64 %red +}