diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -138,16 +138,17 @@ // nxv2i32 = vecreduce_add nxv8i32, nxv2i32 // The different in types does introduce extra vsetvli instructions but // similarly it reduces the number of registers consumed per reduction. - VECREDUCE_ADD, - VECREDUCE_UMAX, - VECREDUCE_SMAX, - VECREDUCE_UMIN, - VECREDUCE_SMIN, - VECREDUCE_AND, - VECREDUCE_OR, - VECREDUCE_XOR, - VECREDUCE_FADD, - VECREDUCE_SEQ_FADD, + // Also has a mask and VL operand. + VECREDUCE_ADD_VL, + VECREDUCE_UMAX_VL, + VECREDUCE_SMAX_VL, + VECREDUCE_UMIN_VL, + VECREDUCE_SMIN_VL, + VECREDUCE_AND_VL, + VECREDUCE_OR_VL, + VECREDUCE_XOR_VL, + VECREDUCE_FADD_VL, + VECREDUCE_SEQ_FADD_VL, // Vector binary and unary ops with a mask as a third operand, and VL as a // fourth operand. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -595,6 +595,17 @@ setOperationAction(ISD::ZERO_EXTEND, VT, Custom); setOperationAction(ISD::BITCAST, VT, Custom); + + // Custom-lower reduction operations to set up the corresponding custom + // nodes' operands. + setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); + setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -638,6 +649,9 @@ setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::BITCAST, VT, Custom); + + setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); } } } @@ -2399,21 +2413,21 @@ default: llvm_unreachable("Unhandled reduction"); case ISD::VECREDUCE_ADD: - return RISCVISD::VECREDUCE_ADD; + return RISCVISD::VECREDUCE_ADD_VL; case ISD::VECREDUCE_UMAX: - return RISCVISD::VECREDUCE_UMAX; + return RISCVISD::VECREDUCE_UMAX_VL; case ISD::VECREDUCE_SMAX: - return RISCVISD::VECREDUCE_SMAX; + return RISCVISD::VECREDUCE_SMAX_VL; case ISD::VECREDUCE_UMIN: - return RISCVISD::VECREDUCE_UMIN; + return RISCVISD::VECREDUCE_UMIN_VL; case ISD::VECREDUCE_SMIN: - return RISCVISD::VECREDUCE_SMIN; + return RISCVISD::VECREDUCE_SMIN_VL; case ISD::VECREDUCE_AND: - return RISCVISD::VECREDUCE_AND; + return RISCVISD::VECREDUCE_AND_VL; case ISD::VECREDUCE_OR: - return RISCVISD::VECREDUCE_OR; + return RISCVISD::VECREDUCE_OR_VL; case ISD::VECREDUCE_XOR: - return RISCVISD::VECREDUCE_XOR; + return RISCVISD::VECREDUCE_XOR_VL; } } @@ -2423,18 +2437,35 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - assert(Op.getValueType().isSimple() && - Op.getOperand(0).getValueType().isSimple() && - "Unexpected vector-reduce lowering"); MVT VecVT = Op.getOperand(0).getSimpleValueType(); MVT VecEltVT = VecVT.getVectorElementType(); + + // Avoid creating vectors with illegal type. + if (!isTypeLegal(VecVT)) + return SDValue(); + unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); - MVT M1VT = getLMUL1VT(VecVT); + + SDValue Vec = Op.getOperand(0); + + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VecVT); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + + MVT M1VT = getLMUL1VT(ContainerVT); + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); + + // FIXME: This is a VLMAX splat which might be too large and can prevent + // vsetvli removal. SDValue NeutralElem = DAG.getNeutralElement( ISD::getVecReduceBaseOpcode(Op.getOpcode()), DL, VecEltVT, SDNodeFlags()); SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); SDValue Reduction = - DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat); + DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, DAG.getConstant(0, DL, Subtarget.getXLenVT())); return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); @@ -2450,10 +2481,10 @@ default: llvm_unreachable("Unhandled reduction"); case ISD::VECREDUCE_FADD: - return std::make_tuple(RISCVISD::VECREDUCE_FADD, Op.getOperand(0), + return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), DAG.getConstantFP(0.0, DL, EltVT)); case ISD::VECREDUCE_SEQ_FADD: - return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD, Op.getOperand(1), + return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), Op.getOperand(0)); } } @@ -2467,10 +2498,24 @@ SDValue VectorVal, ScalarVal; std::tie(RVVOpcode, VectorVal, ScalarVal) = getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); + MVT VecVT = VectorVal.getSimpleValueType(); + + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VecVT); + VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); + } MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); + + // FIXME: This is a VLMAX splat which might be too large and can prevent + // vsetvli removal. SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); - SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat); + SDValue Reduction = + DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, DAG.getConstant(0, DL, Subtarget.getXLenVT())); } @@ -3339,7 +3384,8 @@ // The custom-lowering for these nodes returns a vector whose first element // is the result of the reduction. Extract its first element and let the // legalization for EXTRACT_VECTOR_ELT do the rest of the job. - Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG)); + if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) + Results.push_back(V); break; } } @@ -5810,16 +5856,16 @@ NODE_NAME_CASE(VSLIDEDOWN_VL) NODE_NAME_CASE(VID_VL) NODE_NAME_CASE(VFNCVT_ROD_VL) - NODE_NAME_CASE(VECREDUCE_ADD) - NODE_NAME_CASE(VECREDUCE_UMAX) - NODE_NAME_CASE(VECREDUCE_SMAX) - NODE_NAME_CASE(VECREDUCE_UMIN) - NODE_NAME_CASE(VECREDUCE_SMIN) - NODE_NAME_CASE(VECREDUCE_AND) - NODE_NAME_CASE(VECREDUCE_OR) - NODE_NAME_CASE(VECREDUCE_XOR) - NODE_NAME_CASE(VECREDUCE_FADD) - NODE_NAME_CASE(VECREDUCE_SEQ_FADD) + NODE_NAME_CASE(VECREDUCE_ADD_VL) + NODE_NAME_CASE(VECREDUCE_UMAX_VL) + NODE_NAME_CASE(VECREDUCE_SMAX_VL) + NODE_NAME_CASE(VECREDUCE_UMIN_VL) + NODE_NAME_CASE(VECREDUCE_SMIN_VL) + NODE_NAME_CASE(VECREDUCE_AND_VL) + NODE_NAME_CASE(VECREDUCE_OR_VL) + NODE_NAME_CASE(VECREDUCE_XOR_VL) + NODE_NAME_CASE(VECREDUCE_FADD_VL) + NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) NODE_NAME_CASE(ADD_VL) NODE_NAME_CASE(AND_VL) NODE_NAME_CASE(MUL_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -37,14 +37,6 @@ dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix); } -def SDTRVVVecReduce : SDTypeProfile<1, 2, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2> -]>; - -foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR", - "FADD", "SEQ_FADD"] in - def rvv_vecreduce_#kind : SDNode<"RISCVISD::VECREDUCE_"#kind, SDTRVVVecReduce>; - multiclass VPatUSLoadStoreSDNode { - foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in { - defvar vti_m1 = !cast(!if(is_float, "VF", "VI") # vti.SEW # "M1"); - def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2)), - (!cast(instruction_name#"_VS_"#vti.LMul.MX) - (vti_m1.Vector (IMPLICIT_DEF)), - (vti.Vector vti.RegClass:$rs1), - (vti_m1.Vector VR:$rs2), - vti.AVL, vti.SEW)>; - } -} - //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -487,20 +467,6 @@ vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>; } -// 15.1. Vector Single-Width Integer Reduction Instructions -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; - -// 15.3. Vector Single-Width Floating-Point Reduction Instructions -defm "" : VPatReductionSDNode; -defm "" : VPatReductionSDNode; - // 16.1. Vector Mask-Register Logical Instructions foreach mti = AllMasks in { def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -185,6 +185,15 @@ SDTCVecEltisVT<2, i1>, SDTCisVT<3, XLenVT>]>>; +def SDTRVVVecReduce : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<1, 3>, SDTCisVT<4, XLenVT> +]>; + +foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR", + "FADD", "SEQ_FADD"] in + def rvv_vecreduce_#kind#_vl : SDNode<"RISCVISD::VECREDUCE_"#kind#"_VL", SDTRVVVecReduce>; + // Ignore the vl operand. def SplatFPOp : PatFrag<(ops node:$op), (riscv_vfmv_v_f_vl node:$op, srcvalue)>; @@ -479,6 +488,20 @@ } } +multiclass VPatReductionVL { + foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in { + defvar vti_m1 = !cast(!if(is_float, "VF", "VI") # vti.SEW # "M1"); + def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#"_VS_"#vti.LMul.MX) + (vti_m1.Vector (IMPLICIT_DEF)), + (vti.Vector vti.RegClass:$rs1), + (vti_m1.Vector VR:$rs2), + GPR:$vl, vti.SEW)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -645,6 +668,24 @@ } // Predicates = [HasStdExtV] +// 15.1. Vector Single-Width Integer Reduction Instructions +let Predicates = [HasStdExtV] in { +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +} // Predicates = [HasStdExtV] + +// 15.3. Vector Single-Width Floating-Point Reduction Instructions +let Predicates = [HasStdExtV, HasStdExtF] in { +defm "" : VPatReductionVL; +defm "" : VPatReductionVL; +} // Predicates = [HasStdExtV, HasStdExtF] + let Predicates = [HasStdExtV, HasStdExtF] in { // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -0,0 +1,786 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) + +define half @vreduce_fadd_v1f16(<1 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: flh ft0, 0(a0) +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <1 x half>, <1 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v1f16(half %s, <1 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v1f16(<1 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: flh ft0, 0(a0) +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <1 x half>, <1 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v1f16(half %s, <1 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) + +define half @vreduce_fadd_v2f16(<2 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <2 x half>, <2 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v2f16(<2 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x half>, <2 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) + +define half @vreduce_fadd_v4f16(<4 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v4f16(<4 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) + +define half @vreduce_fadd_v8f16(<8 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <8 x half>, <8 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v8f16(<8 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <8 x half>, <8 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) + +define half @vreduce_fadd_v16f16(<16 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <16 x half>, <16 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v16f16(<16 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <16 x half>, <16 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) + +define half @vreduce_fadd_v32f16(<32 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <32 x half>, <32 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v32f16(<32 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <32 x half>, <32 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) + +define half @vreduce_fadd_v64f16(<64 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <64 x half>, <64 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v64f16(<64 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <64 x half>, <64 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) + +define half @vreduce_fadd_v128f16(<128 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_fadd_v128f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <128 x half>, <128 x half>* %x + %red = call reassoc half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v) + ret half %red +} + +define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) { +; CHECK-LABEL: vreduce_ord_fadd_v128f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: addi a2, zero, 64 +; CHECK-NEXT: vsetvli a3, a2, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v16, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <128 x half>, <128 x half>* %x + %red = call half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v) + ret half %red +} + +declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) + +define float @vreduce_fadd_v1f32(<1 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <1 x float>, <1 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v1f32(<1 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <1 x float>, <1 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v1f32(float %s, <1 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) + +define float @vreduce_fadd_v2f32(<2 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <2 x float>, <2 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v2f32(<2 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x float>, <2 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) + +define float @vreduce_fadd_v4f32(<4 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v4f32(<4 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) + +define float @vreduce_fadd_v8f32(<8 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <8 x float>, <8 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v8f32(<8 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <8 x float>, <8 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) + +define float @vreduce_fadd_v16f32(<16 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <16 x float>, <16 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v16f32(<16 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <16 x float>, <16 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) + +define float @vreduce_fadd_v32f32(<32 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <32 x float>, <32 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v32f32(<32 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <32 x float>, <32 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) + +define float @vreduce_fadd_v64f32(<64 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_fadd_v64f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <64 x float>, <64 x float>* %x + %red = call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v) + ret float %red +} + +define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_ord_fadd_v64f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v16, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <64 x float>, <64 x float>* %x + %red = call float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v) + ret float %red +} + +declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) + +define double @vreduce_fadd_v1f64(<1 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_fadd_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.d fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <1 x double>, <1 x double>* %x + %red = call reassoc double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %v) + ret double %red +} + +define double @vreduce_ord_fadd_v1f64(<1 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_ord_fadd_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <1 x double>, <1 x double>* %x + %red = call double @llvm.vector.reduce.fadd.v1f64(double %s, <1 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) + +define double @vreduce_fadd_v2f64(<2 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_fadd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.d fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <2 x double>, <2 x double>* %x + %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v) + ret double %red +} + +define double @vreduce_ord_fadd_v2f64(<2 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_ord_fadd_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, fa0 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x double>, <2 x double>* %x + %red = call double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) + +define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_fadd_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.d fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v) + ret double %red +} + +define double @vreduce_ord_fadd_v4f64(<4 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_ord_fadd_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) + +define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_fadd_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.d fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <8 x double>, <8 x double>* %x + %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v) + ret double %red +} + +define double @vreduce_ord_fadd_v8f64(<8 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_ord_fadd_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; CHECK-NEXT: vle64.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <8 x double>, <8 x double>* %x + %red = call double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) + +define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_fadd_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.d fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <16 x double>, <16 x double>* %x + %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v) + ret double %red +} + +define double @vreduce_ord_fadd_v16f64(<16 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_ord_fadd_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <16 x double>, <16 x double>* %x + %red = call double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) + +define double @vreduce_fadd_v32f64(<32 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_fadd_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fadd.d fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <32 x double>, <32 x double>* %x + %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v) + ret double %red +} + +define double @vreduce_ord_fadd_v32f64(<32 x double>* %x, double %s) { +; CHECK-LABEL: vreduce_ord_fadd_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v16, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredosum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <32 x double>, <32 x double>* %x + %red = call double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v) + ret double %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -0,0 +1,25140 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) + +define i8 @vreduce_add_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) + +define i8 @vreduce_add_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) + +define i8 @vreduce_add_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) + +define i8 @vreduce_add_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) + +define i8 @vreduce_add_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) + +define i8 @vreduce_add_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) + +define i8 @vreduce_add_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) + +define i8 @vreduce_add_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.add.v256i8(<256 x i8>) + +define i8 @vreduce_add_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_add_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>) + +define i16 @vreduce_add_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) + +define i16 @vreduce_add_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) + +define i16 @vreduce_add_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) + +define i16 @vreduce_add_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) + +define i16 @vreduce_add_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) + +define i16 @vreduce_add_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) + +define i16 @vreduce_add_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>) + +define i16 @vreduce_add_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_add_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>) + +define i32 @vreduce_add_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) + +define i32 @vreduce_add_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) + +define i32 @vreduce_add_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredsum.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) + +define i32 @vreduce_add_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredsum.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) + +define i32 @vreduce_add_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredsum.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) + +define i32 @vreduce_add_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>) + +define i32 @vreduce_add_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_add_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredsum.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) + +define i64 @vreduce_add_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_add_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) + +define i64 @vreduce_add_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_add_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredsum.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredsum.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) + +define i64 @vreduce_add_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_add_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredsum.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredsum.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) + +define i64 @vreduce_add_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_add_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredsum.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredsum.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) + +define i64 @vreduce_add_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_add_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredsum.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredsum.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>) + +define i64 @vreduce_add_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_add_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vadd.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vadd.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredsum.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>) + +define i64 @vreduce_add_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_add_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: addi s1, zero, 32 +; RV32-NEXT: vsetvli a2, s1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vsetivli a1, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 296(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 292(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 288(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 284(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 280(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 276(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 272(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 268(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 264(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 260(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 256(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 252(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 248(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 244(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 240(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 236(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 232(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 228(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 224(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 220(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 216(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 212(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 208(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 204(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 200(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 196(a2) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 224 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 216 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 208 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 224 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 192(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 216 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 188(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 208 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 184(a2) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a1, v24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 180(a2) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 176(a2) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 172(a2) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a1, s1, e32,m8,ta,mu +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vsetivli a1, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 224 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 216 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 208 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 200 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 192 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 184 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 176 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 168 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 160 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 152 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 144 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 136 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 7 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 112 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 104 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 96 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 88 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 80 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 72 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 224 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 168(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 216 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 164(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 208 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 160(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 200 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 156(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 192 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 152(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 184 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 148(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 176 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 144(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 168 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 140(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 160 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 136(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 152 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 132(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 144 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 128(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 136 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 124(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 7 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 120(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 116(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 112 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 112(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 104 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 108(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 96 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 104(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 88 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 100(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 80 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 96(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 72 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 92(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 88(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 84(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 80(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 76(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 72(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 68(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 64(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 60(a2) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 56(a2) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a1, v24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 52(a2) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 48(a2) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 44(a2) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a1, s1, e32,m8,ta,mu +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vsetivli a1, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 224 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 216 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 208 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 200 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 192 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 184 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 176 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 168 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 160 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 152 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 144 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 136 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 7 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 112 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 104 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 96 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 88 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 80 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 72 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 224 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 40(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 216 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 36(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 208 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 32(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 200 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 28(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 192 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 24(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 184 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 20(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 176 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 16(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 168 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a1, 12(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 160 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s11, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 152 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s10, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 144 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 136 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 7 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 112 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s4, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 104 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s3, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 96 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s9, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 88 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 80 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 72 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v0 +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v0 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: vsetvli s1, s1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vslidedown.vi v24, v8, 1 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: vmv.x.s s1, v24 +; RV32-NEXT: add a0, s1, a0 +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: add ra, s1, ra +; RV32-NEXT: sltu s1, ra, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, ra, s1 +; RV32-NEXT: sltu s0, s1, ra +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: sltu s0, s1, s0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: sltu s0, s1, s0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: sltu s0, s1, s0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: sltu s0, s1, s0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: sltu s0, s1, s0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: sltu s0, s1, s0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: vslidedown.vi v8, v8, 30 +; RV32-NEXT: vmv.x.s s0, v8 +; RV32-NEXT: add s0, s1, s0 +; RV32-NEXT: sltu s1, s0, s1 +; RV32-NEXT: add a0, a0, s1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a1, s0, a2 +; RV32-NEXT: sltu a2, a1, s0 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a2, a1, a4 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a1, a2, a6 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, a7 +; RV32-NEXT: add a2, a1, t0 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t1 +; RV32-NEXT: add a1, a2, t2 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, t3 +; RV32-NEXT: add a2, a1, t4 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t5 +; RV32-NEXT: add a1, a2, t6 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, s2 +; RV32-NEXT: add a2, a1, s9 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s3 +; RV32-NEXT: add a1, a2, s4 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, s5 +; RV32-NEXT: add a2, a1, s6 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a1, a2, s8 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, s10 +; RV32-NEXT: add a2, a1, s11 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 12(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 16(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 20(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 24(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 28(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 32(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 44(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 48(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 56(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 60(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 64(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 68(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 72(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 76(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 80(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 88(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 92(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 96(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 108(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 112(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 116(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 120(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 124(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 128(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 132(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 136(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 140(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 144(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 148(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 152(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 156(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 160(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 164(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 168(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 172(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 176(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 180(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 184(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 188(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 192(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 196(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 200(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 204(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 208(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 212(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 216(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 220(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 224(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 228(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 232(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 236(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 240(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 244(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 248(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 252(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 256(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 260(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 264(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 268(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 272(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 276(a2) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 280(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a1, a2 +; RV32-NEXT: sltu a1, a2, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 284(a1) # 4-byte Folded Reload +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: lw a1, 288(a1) # 4-byte Folded Reload +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: sltu a2, a1, a2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 292(a2) # 4-byte Folded Reload +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: lw a0, 296(a0) # 4-byte Folded Reload +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sltu a1, a0, a1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_add_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vadd.vv v16, v24, v16 +; RV64-NEXT: vadd.vv v8, v8, v0 +; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredsum.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8>) + +define i8 @vreduce_and_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>) + +define i8 @vreduce_and_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) + +define i8 @vreduce_and_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) + +define i8 @vreduce_and_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) + +define i8 @vreduce_and_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) + +define i8 @vreduce_and_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>) + +define i8 @vreduce_and_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredand.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>) + +define i8 @vreduce_and_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.and.v256i8(<256 x i8>) + +define i8 @vreduce_and_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_and_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.and.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.and.v1i16(<1 x i16>) + +define i16 @vreduce_and_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>) + +define i16 @vreduce_and_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) + +define i16 @vreduce_and_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) + +define i16 @vreduce_and_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) + +define i16 @vreduce_and_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>) + +define i16 @vreduce_and_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredand.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>) + +define i16 @vreduce_and_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.and.v128i16(<128 x i16>) + +define i16 @vreduce_and_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_and_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32>) + +define i32 @vreduce_and_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) + +define i32 @vreduce_and_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) + +define i32 @vreduce_and_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredand.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) + +define i32 @vreduce_and_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredand.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>) + +define i32 @vreduce_and_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredand.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>) + +define i32 @vreduce_and_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.and.v64i32(<64 x i32>) + +define i32 @vreduce_and_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_and_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredand.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) + +define i64 @vreduce_and_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_and_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) + +define i64 @vreduce_and_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_and_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredand.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, -1 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredand.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) + +define i64 @vreduce_and_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_and_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredand.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredand.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>) + +define i64 @vreduce_and_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_and_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredand.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredand.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>) + +define i64 @vreduce_and_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_and_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredand.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredand.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.v32i64(<32 x i64>) + +define i64 @vreduce_and_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_and_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vand.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vand.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vand.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vand.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredand.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.and.v64i64(<64 x i64>) + +define i64 @vreduce_and_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_and_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -160 +; RV32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 88(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 52(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 44(a3) # 4-byte Folded Spill +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v16, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 31 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 40(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 29 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 36(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 27 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 32(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 25 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 28(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 23 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 24(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 21 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 20(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 19 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 16(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 17 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 12(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: vmv.x.s s11, v24 +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: vmv.x.s s10, v24 +; RV32-NEXT: vslidedown.vi v8, v16, 11 +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v0, v16, 3 +; RV32-NEXT: vslidedown.vi v24, v16, 1 +; RV32-NEXT: vmv.x.s s9, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v8 +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: vmv.x.s s4, v24 +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v0, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v0, 31 +; RV32-NEXT: vslidedown.vi v24, v0, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 3 +; RV32-NEXT: vs8r.v v24, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 1 +; RV32-NEXT: vmv.x.s s3, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v8 +; RV32-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a3, v8 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v24, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v24, 2 +; RV32-NEXT: vmv.x.s a0, v24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi s1, zero, 112 +; RV32-NEXT: mul a1, a1, s1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 3 +; RV32-NEXT: vslidedown.vi v16, v24, 1 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: and a1, ra, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 5 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 7 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 9 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 11 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 13 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 15 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 17 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 19 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 21 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 23 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 25 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 27 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 29 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 31 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: and a1, a1, s1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: and a1, a1, a6 +; RV32-NEXT: and a1, a1, a7 +; RV32-NEXT: and a1, a1, t0 +; RV32-NEXT: and a1, a1, t1 +; RV32-NEXT: and a1, a1, t2 +; RV32-NEXT: and a1, a1, t3 +; RV32-NEXT: and a1, a1, t4 +; RV32-NEXT: and a1, a1, t5 +; RV32-NEXT: and a1, a1, t6 +; RV32-NEXT: and a1, a1, s0 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: and a1, a1, s4 +; RV32-NEXT: and a1, a1, s5 +; RV32-NEXT: and a1, a1, s6 +; RV32-NEXT: and a1, a1, s7 +; RV32-NEXT: and a1, a1, s8 +; RV32-NEXT: and a1, a1, s9 +; RV32-NEXT: and a1, a1, s10 +; RV32-NEXT: and a1, a1, s11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 12(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 16(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 20(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 24(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 56(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 60(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 64(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 68(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 72(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 76(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 88(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 160 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_and_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vand.vv v16, v24, v16 +; RV64-NEXT: vand.vv v8, v8, v0 +; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredand.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.or.v1i8(<1 x i8>) + +define i8 @vreduce_or_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) + +define i8 @vreduce_or_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) + +define i8 @vreduce_or_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) + +define i8 @vreduce_or_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) + +define i8 @vreduce_or_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) + +define i8 @vreduce_or_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>) + +define i8 @vreduce_or_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredor.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>) + +define i8 @vreduce_or_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.or.v256i8(<256 x i8>) + +define i8 @vreduce_or_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_or_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.or.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.or.v1i16(<1 x i16>) + +define i16 @vreduce_or_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) + +define i16 @vreduce_or_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) + +define i16 @vreduce_or_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) + +define i16 @vreduce_or_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) + +define i16 @vreduce_or_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>) + +define i16 @vreduce_or_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredor.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>) + +define i16 @vreduce_or_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.or.v128i16(<128 x i16>) + +define i16 @vreduce_or_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_or_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.or.v1i32(<1 x i32>) + +define i32 @vreduce_or_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) + +define i32 @vreduce_or_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) + +define i32 @vreduce_or_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) + +define i32 @vreduce_or_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredor.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) + +define i32 @vreduce_or_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredor.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>) + +define i32 @vreduce_or_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.or.v64i32(<64 x i32>) + +define i32 @vreduce_or_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_or_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) + +define i64 @vreduce_or_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_or_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) + +define i64 @vreduce_or_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_or_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredor.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredor.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) + +define i64 @vreduce_or_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_or_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredor.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredor.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) + +define i64 @vreduce_or_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_or_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredor.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredor.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) + +define i64 @vreduce_or_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_or_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredor.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredor.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.v32i64(<32 x i64>) + +define i64 @vreduce_or_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_or_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vor.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vor.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vor.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vor.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredor.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.or.v64i64(<64 x i64>) + +define i64 @vreduce_or_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_or_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -160 +; RV32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 88(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 52(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 44(a3) # 4-byte Folded Spill +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v16, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 31 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 40(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 29 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 36(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 27 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 32(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 25 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 28(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 23 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 24(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 21 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 20(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 19 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 16(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 17 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 12(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: vmv.x.s s11, v24 +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: vmv.x.s s10, v24 +; RV32-NEXT: vslidedown.vi v8, v16, 11 +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v0, v16, 3 +; RV32-NEXT: vslidedown.vi v24, v16, 1 +; RV32-NEXT: vmv.x.s s9, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v8 +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: vmv.x.s s4, v24 +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v0, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v0, 31 +; RV32-NEXT: vslidedown.vi v24, v0, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 3 +; RV32-NEXT: vs8r.v v24, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 1 +; RV32-NEXT: vmv.x.s s3, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v8 +; RV32-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a3, v8 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v24, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v24, 2 +; RV32-NEXT: vmv.x.s a0, v24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi s1, zero, 112 +; RV32-NEXT: mul a1, a1, s1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 3 +; RV32-NEXT: vslidedown.vi v16, v24, 1 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: or a1, ra, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 5 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 7 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 9 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 11 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 13 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 15 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 17 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 19 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 21 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 23 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 25 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 27 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 29 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 31 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: or a1, a1, s1 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: or a1, a1, a4 +; RV32-NEXT: or a1, a1, a5 +; RV32-NEXT: or a1, a1, a6 +; RV32-NEXT: or a1, a1, a7 +; RV32-NEXT: or a1, a1, t0 +; RV32-NEXT: or a1, a1, t1 +; RV32-NEXT: or a1, a1, t2 +; RV32-NEXT: or a1, a1, t3 +; RV32-NEXT: or a1, a1, t4 +; RV32-NEXT: or a1, a1, t5 +; RV32-NEXT: or a1, a1, t6 +; RV32-NEXT: or a1, a1, s0 +; RV32-NEXT: or a1, a1, s2 +; RV32-NEXT: or a1, a1, s3 +; RV32-NEXT: or a1, a1, s4 +; RV32-NEXT: or a1, a1, s5 +; RV32-NEXT: or a1, a1, s6 +; RV32-NEXT: or a1, a1, s7 +; RV32-NEXT: or a1, a1, s8 +; RV32-NEXT: or a1, a1, s9 +; RV32-NEXT: or a1, a1, s10 +; RV32-NEXT: or a1, a1, s11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 12(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 16(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 20(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 24(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 56(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 60(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 64(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 68(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 72(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 76(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 88(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 160 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_or_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredor.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.xor.v1i8(<1 x i8>) + +define i8 @vreduce_xor_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) + +define i8 @vreduce_xor_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) + +define i8 @vreduce_xor_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) + +define i8 @vreduce_xor_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) + +define i8 @vreduce_xor_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) + +define i8 @vreduce_xor_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>) + +define i8 @vreduce_xor_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>) + +define i8 @vreduce_xor_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.xor.v256i8(<256 x i8>) + +define i8 @vreduce_xor_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_xor_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vxor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.xor.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.xor.v1i16(<1 x i16>) + +define i16 @vreduce_xor_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) + +define i16 @vreduce_xor_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) + +define i16 @vreduce_xor_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) + +define i16 @vreduce_xor_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) + +define i16 @vreduce_xor_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>) + +define i16 @vreduce_xor_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>) + +define i16 @vreduce_xor_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.xor.v128i16(<128 x i16>) + +define i16 @vreduce_xor_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_xor_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vxor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.xor.v1i32(<1 x i32>) + +define i32 @vreduce_xor_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) + +define i32 @vreduce_xor_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) + +define i32 @vreduce_xor_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredxor.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) + +define i32 @vreduce_xor_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredxor.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) + +define i32 @vreduce_xor_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredxor.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>) + +define i32 @vreduce_xor_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.xor.v64i32(<64 x i32>) + +define i32 @vreduce_xor_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_xor_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vxor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredxor.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) + +define i64 @vreduce_xor_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_xor_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) + +define i64 @vreduce_xor_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_xor_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredxor.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredxor.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) + +define i64 @vreduce_xor_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_xor_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredxor.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredxor.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) + +define i64 @vreduce_xor_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_xor_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredxor.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredxor.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) + +define i64 @vreduce_xor_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_xor_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredxor.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredxor.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.v32i64(<32 x i64>) + +define i64 @vreduce_xor_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_xor_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vxor.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vxor.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vxor.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vxor.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredxor.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.xor.v64i64(<64 x i64>) + +define i64 @vreduce_xor_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_xor_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -160 +; RV32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 120 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 88(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 52(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 44(a3) # 4-byte Folded Spill +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v16, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 31 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 40(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 29 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 36(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 27 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 32(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 25 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 28(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 23 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 24(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 21 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 20(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 19 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 16(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 17 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 120 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 12(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: vmv.x.s s11, v24 +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: vmv.x.s s10, v24 +; RV32-NEXT: vslidedown.vi v8, v16, 11 +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v0, v16, 3 +; RV32-NEXT: vslidedown.vi v24, v16, 1 +; RV32-NEXT: vmv.x.s s9, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v8 +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: vmv.x.s s4, v24 +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v0, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v0, 31 +; RV32-NEXT: vslidedown.vi v24, v0, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 3 +; RV32-NEXT: vs8r.v v24, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v0, 1 +; RV32-NEXT: vmv.x.s s3, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v8 +; RV32-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a3, v8 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v24, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v24, 2 +; RV32-NEXT: vmv.x.s a0, v24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v0, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi s1, zero, 112 +; RV32-NEXT: mul a1, a1, s1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 2 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 4 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 6 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 8 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 10 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 12 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 14 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 16 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 18 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 20 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 22 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 24 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 26 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 28 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v16, 30 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 3 +; RV32-NEXT: vslidedown.vi v16, v24, 1 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: xor a1, ra, a1 +; RV32-NEXT: vslidedown.vi v8, v24, 5 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 7 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 9 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 11 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 13 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 15 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 17 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 19 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 21 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 23 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 25 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 27 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 29 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: vslidedown.vi v8, v24, 31 +; RV32-NEXT: vmv.x.s s1, v8 +; RV32-NEXT: xor a1, a1, s1 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a1, a1, a4 +; RV32-NEXT: xor a1, a1, a5 +; RV32-NEXT: xor a1, a1, a6 +; RV32-NEXT: xor a1, a1, a7 +; RV32-NEXT: xor a1, a1, t0 +; RV32-NEXT: xor a1, a1, t1 +; RV32-NEXT: xor a1, a1, t2 +; RV32-NEXT: xor a1, a1, t3 +; RV32-NEXT: xor a1, a1, t4 +; RV32-NEXT: xor a1, a1, t5 +; RV32-NEXT: xor a1, a1, t6 +; RV32-NEXT: xor a1, a1, s0 +; RV32-NEXT: xor a1, a1, s2 +; RV32-NEXT: xor a1, a1, s3 +; RV32-NEXT: xor a1, a1, s4 +; RV32-NEXT: xor a1, a1, s5 +; RV32-NEXT: xor a1, a1, s6 +; RV32-NEXT: xor a1, a1, s7 +; RV32-NEXT: xor a1, a1, s8 +; RV32-NEXT: xor a1, a1, s9 +; RV32-NEXT: xor a1, a1, s10 +; RV32-NEXT: xor a1, a1, s11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 12(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 16(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 20(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 24(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 56(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 60(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 64(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 68(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 72(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 76(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 88(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 160 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_xor_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vxor.vv v16, v24, v16 +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredxor.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>) + +define i8 @vreduce_smin_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) + +define i8 @vreduce_smin_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredmin.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) + +define i8 @vreduce_smin_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredmin.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) + +define i8 @vreduce_smin_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredmin.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) + +define i8 @vreduce_smin_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredmin.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) + +define i8 @vreduce_smin_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredmin.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) + +define i8 @vreduce_smin_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredmin.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) + +define i8 @vreduce_smin_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smin.v256i8(<256 x i8>) + +define i8 @vreduce_smin_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_smin_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: addi a0, zero, 127 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.smin.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.smin.v1i16(<1 x i16>) + +define i16 @vreduce_smin_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_smin_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) + +define i16 @vreduce_smin_v2i16(<2 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; RV32-NEXT: vredmin.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; RV64-NEXT: vredmin.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) + +define i16 @vreduce_smin_v4i16(<4 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; RV32-NEXT: vredmin.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; RV64-NEXT: vredmin.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) + +define i16 @vreduce_smin_v8i16(<8 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; RV32-NEXT: vredmin.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; RV64-NEXT: vredmin.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) + +define i16 @vreduce_smin_v16i16(<16 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v16i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; RV32-NEXT: vle16.v v26, (a0) +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV32-NEXT: vredmin.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v16i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; RV64-NEXT: vle16.v v26, (a0) +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV64-NEXT: vredmin.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) + +define i16 @vreduce_smin_v32i16(<32 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v32i16: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; RV32-NEXT: vle16.v v28, (a0) +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; RV32-NEXT: vredmin.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v32i16: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 32 +; RV64-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; RV64-NEXT: vle16.v v28, (a0) +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; RV64-NEXT: vredmin.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) + +define i16 @vreduce_smin_v64i16(<64 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v64i16: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 64 +; RV32-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; RV32-NEXT: vredmin.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v64i16: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 64 +; RV64-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smin.v128i16(<128 x i16>) + +define i16 @vreduce_smin_v128i16(<128 x i16>* %x) { +; RV32-LABEL: vreduce_smin_v128i16: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 64 +; RV32-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; RV32-NEXT: vredmin.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v128i16: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 64 +; RV64-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle16.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.smin.v1i32(<1 x i32>) + +define i32 @vreduce_smin_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_smin_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) + +define i32 @vreduce_smin_v2i32(<2 x i32>* %x) { +; RV32-LABEL: vreduce_smin_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; RV32-NEXT: vle32.v v25, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vredmin.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; RV64-NEXT: vle32.v v25, (a0) +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV64-NEXT: vredmin.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) + +define i32 @vreduce_smin_v4i32(<4 x i32>* %x) { +; RV32-LABEL: vreduce_smin_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v25, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; RV32-NEXT: vredmin.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV64-NEXT: vle32.v v25, (a0) +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; RV64-NEXT: vredmin.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) + +define i32 @vreduce_smin_v8i32(<8 x i32>* %x) { +; RV32-LABEL: vreduce_smin_v8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; RV32-NEXT: vle32.v v26, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32-NEXT: vredmin.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; RV64-NEXT: vle32.v v26, (a0) +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV64-NEXT: vredmin.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) + +define i32 @vreduce_smin_v16i32(<16 x i32>* %x) { +; RV32-LABEL: vreduce_smin_v16i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vle32.v v28, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV32-NEXT: vredmin.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v16i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV64-NEXT: vle32.v v28, (a0) +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV64-NEXT: vredmin.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) + +define i32 @vreduce_smin_v32i32(<32 x i32>* %x) { +; RV32-LABEL: vreduce_smin_v32i32: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; RV32-NEXT: vredmin.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v32i32: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 32 +; RV64-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smin.v64i32(<64 x i32>) + +define i32 @vreduce_smin_v64i32(<64 x i32>* %x) { +; RV32-LABEL: vreduce_smin_v64i32: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle32.v v16, (a0) +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; RV32-NEXT: vredmin.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v64i32: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 32 +; RV64-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle32.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>) + +define i64 @vreduce_smin_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_smin_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) + +define i64 @vreduce_smin_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_smin_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v26, v26, a1 +; RV32-NEXT: vsrl.vx v26, v26, a1 +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vmv.v.x v27, a0 +; RV32-NEXT: vsll.vx v27, v27, a1 +; RV32-NEXT: vor.vv v26, v26, v27 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredmin.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredmin.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) + +define i64 @vreduce_smin_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_smin_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v25, v25, a1 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vmv.v.x v28, a0 +; RV32-NEXT: vsll.vx v28, v28, a1 +; RV32-NEXT: vor.vv v25, v25, v28 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredmin.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredmin.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>) + +define i64 @vreduce_smin_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_smin_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v25, v25, a1 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsll.vx v26, v26, a1 +; RV32-NEXT: vor.vv v25, v25, v26 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredmin.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredmin.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>) + +define i64 @vreduce_smin_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_smin_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v25, v25, a1 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsll.vx v26, v26, a1 +; RV32-NEXT: vor.vv v25, v25, v26 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredmin.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.v32i64(<32 x i64>) + +define i64 @vreduce_smin_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_smin_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vmin.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vmin.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vmin.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vmin.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smin_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smin.v64i64(<64 x i64>) + +define i64 @vreduce_smin_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_smin_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -480 +; RV32-NEXT: sw ra, 476(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 472(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 468(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 464(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 460(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 456(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 452(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 448(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 444(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 440(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 436(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 432(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 428(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 320(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 324(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 312(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 316(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 304(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 308(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 296(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 300(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 288(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 292(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 280(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 284(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 272(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 276(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 264(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 268(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 256(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 260(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 248(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 252(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 240(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 244(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 232(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 236(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 388(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 228(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 384(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 224(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 216(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 220(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 212(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 424(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 380(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 208(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 200(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 204(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 196(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 420(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 376(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 192(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 184(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 188(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 180(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 416(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 372(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 176(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 168(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 172(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 164(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 412(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 368(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 160(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 364(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 156(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 148(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 152(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 144(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 408(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 360(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 140(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 356(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 136(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 128(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 132(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 124(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 404(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 352(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 120(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 112(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 116(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 400(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 348(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 108(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 344(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 340(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 336(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s9, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 396(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 332(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v0 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 392(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 328(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 40(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 44(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 32(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 36(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 28(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 12(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 24(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 20(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 16(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s11, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s ra, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s10, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v0 +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v0 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t6, zero, 24 +; RV32-NEXT: mul a2, a2, t6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: mv s4, a0 +; RV32-NEXT: blt a1, a3, .LBB154_1 +; RV32-NEXT: j .LBB154_358 +; RV32-NEXT: .LBB154_1: +; RV32-NEXT: bltu a0, t6, .LBB154_2 +; RV32-NEXT: j .LBB154_359 +; RV32-NEXT: .LBB154_2: +; RV32-NEXT: beq a1, a3, .LBB154_3 +; RV32-NEXT: j .LBB154_360 +; RV32-NEXT: .LBB154_3: +; RV32-NEXT: blt a1, a3, .LBB154_4 +; RV32-NEXT: j .LBB154_361 +; RV32-NEXT: .LBB154_4: +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: blt a1, a4, .LBB154_5 +; RV32-NEXT: j .LBB154_362 +; RV32-NEXT: .LBB154_5: +; RV32-NEXT: bltu a0, t4, .LBB154_6 +; RV32-NEXT: j .LBB154_363 +; RV32-NEXT: .LBB154_6: +; RV32-NEXT: beq a1, a4, .LBB154_8 +; RV32-NEXT: .LBB154_7: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB154_8: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s0, 88(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s9, 52(a2) # 4-byte Folded Spill +; RV32-NEXT: mv t4, s7 +; RV32-NEXT: blt a1, a4, .LBB154_9 +; RV32-NEXT: j .LBB154_364 +; RV32-NEXT: .LBB154_9: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, s10, .LBB154_10 +; RV32-NEXT: j .LBB154_365 +; RV32-NEXT: .LBB154_10: +; RV32-NEXT: bltu a0, a7, .LBB154_11 +; RV32-NEXT: j .LBB154_366 +; RV32-NEXT: .LBB154_11: +; RV32-NEXT: beq a1, s10, .LBB154_12 +; RV32-NEXT: j .LBB154_367 +; RV32-NEXT: .LBB154_12: +; RV32-NEXT: blt a1, s10, .LBB154_13 +; RV32-NEXT: j .LBB154_368 +; RV32-NEXT: .LBB154_13: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, s3, .LBB154_15 +; RV32-NEXT: .LBB154_14: +; RV32-NEXT: mv a4, s6 +; RV32-NEXT: .LBB154_15: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s10, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s6, .LBB154_16 +; RV32-NEXT: j .LBB154_369 +; RV32-NEXT: .LBB154_16: +; RV32-NEXT: beq a1, s3, .LBB154_17 +; RV32-NEXT: j .LBB154_370 +; RV32-NEXT: .LBB154_17: +; RV32-NEXT: blt a1, s3, .LBB154_18 +; RV32-NEXT: j .LBB154_371 +; RV32-NEXT: .LBB154_18: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, a5, .LBB154_19 +; RV32-NEXT: j .LBB154_372 +; RV32-NEXT: .LBB154_19: +; RV32-NEXT: bltu a0, t5, .LBB154_20 +; RV32-NEXT: j .LBB154_373 +; RV32-NEXT: .LBB154_20: +; RV32-NEXT: beq a1, a5, .LBB154_21 +; RV32-NEXT: j .LBB154_374 +; RV32-NEXT: .LBB154_21: +; RV32-NEXT: blt a1, a5, .LBB154_22 +; RV32-NEXT: j .LBB154_375 +; RV32-NEXT: .LBB154_22: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, t1, .LBB154_23 +; RV32-NEXT: j .LBB154_376 +; RV32-NEXT: .LBB154_23: +; RV32-NEXT: bltu a0, s5, .LBB154_24 +; RV32-NEXT: j .LBB154_377 +; RV32-NEXT: .LBB154_24: +; RV32-NEXT: beq a1, t1, .LBB154_26 +; RV32-NEXT: .LBB154_25: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB154_26: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s5, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, t1, .LBB154_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB154_28: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, s1, .LBB154_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: mv a4, ra +; RV32-NEXT: .LBB154_30: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, ra, .LBB154_31 +; RV32-NEXT: j .LBB154_378 +; RV32-NEXT: .LBB154_31: +; RV32-NEXT: beq a1, s1, .LBB154_32 +; RV32-NEXT: j .LBB154_379 +; RV32-NEXT: .LBB154_32: +; RV32-NEXT: blt a1, s1, .LBB154_33 +; RV32-NEXT: j .LBB154_380 +; RV32-NEXT: .LBB154_33: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, s11, .LBB154_35 +; RV32-NEXT: .LBB154_34: +; RV32-NEXT: mv a4, s8 +; RV32-NEXT: .LBB154_35: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t5, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s8, .LBB154_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: .LBB154_37: +; RV32-NEXT: beq a1, s11, .LBB154_39 +; RV32-NEXT: # %bb.38: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB154_39: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 132(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, s11, .LBB154_41 +; RV32-NEXT: # %bb.40: +; RV32-NEXT: mv a1, s11 +; RV32-NEXT: .LBB154_41: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, t3, .LBB154_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: mv a4, s2 +; RV32-NEXT: .LBB154_43: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s3, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s11, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s2, .LBB154_44 +; RV32-NEXT: j .LBB154_381 +; RV32-NEXT: .LBB154_44: +; RV32-NEXT: beq a1, t3, .LBB154_45 +; RV32-NEXT: j .LBB154_382 +; RV32-NEXT: .LBB154_45: +; RV32-NEXT: blt a1, t3, .LBB154_47 +; RV32-NEXT: .LBB154_46: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB154_47: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 16(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a1, t0, .LBB154_49 +; RV32-NEXT: # %bb.48: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: .LBB154_49: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 24(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 20(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s0, .LBB154_51 +; RV32-NEXT: # %bb.50: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: .LBB154_51: +; RV32-NEXT: beq a1, t0, .LBB154_53 +; RV32-NEXT: # %bb.52: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_53: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s8, 116(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, t0, .LBB154_55 +; RV32-NEXT: # %bb.54: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB154_55: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: blt a1, a6, .LBB154_57 +; RV32-NEXT: # %bb.56: +; RV32-NEXT: mv s0, a4 +; RV32-NEXT: .LBB154_57: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 152(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s6, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a4, .LBB154_58 +; RV32-NEXT: j .LBB154_383 +; RV32-NEXT: .LBB154_58: +; RV32-NEXT: beq a1, a6, .LBB154_59 +; RV32-NEXT: j .LBB154_384 +; RV32-NEXT: .LBB154_59: +; RV32-NEXT: blt a1, a6, .LBB154_60 +; RV32-NEXT: j .LBB154_385 +; RV32-NEXT: .LBB154_60: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, t2, .LBB154_62 +; RV32-NEXT: .LBB154_61: +; RV32-NEXT: mv a4, s1 +; RV32-NEXT: .LBB154_62: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a6, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s1, .LBB154_63 +; RV32-NEXT: j .LBB154_386 +; RV32-NEXT: .LBB154_63: +; RV32-NEXT: beq a1, t2, .LBB154_64 +; RV32-NEXT: j .LBB154_387 +; RV32-NEXT: .LBB154_64: +; RV32-NEXT: blt a1, t2, .LBB154_66 +; RV32-NEXT: .LBB154_65: +; RV32-NEXT: mv a1, t2 +; RV32-NEXT: .LBB154_66: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t6, 108(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 12(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a1, s1, .LBB154_67 +; RV32-NEXT: j .LBB154_388 +; RV32-NEXT: .LBB154_67: +; RV32-NEXT: bltu a0, a5, .LBB154_68 +; RV32-NEXT: j .LBB154_389 +; RV32-NEXT: .LBB154_68: +; RV32-NEXT: beq a1, s1, .LBB154_69 +; RV32-NEXT: j .LBB154_390 +; RV32-NEXT: .LBB154_69: +; RV32-NEXT: blt a1, s1, .LBB154_70 +; RV32-NEXT: j .LBB154_391 +; RV32-NEXT: .LBB154_70: +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: blt a1, s0, .LBB154_72 +; RV32-NEXT: .LBB154_71: +; RV32-NEXT: mv a5, a6 +; RV32-NEXT: .LBB154_72: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t3, 120(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a6, .LBB154_73 +; RV32-NEXT: j .LBB154_392 +; RV32-NEXT: .LBB154_73: +; RV32-NEXT: beq a1, s0, .LBB154_74 +; RV32-NEXT: j .LBB154_393 +; RV32-NEXT: .LBB154_74: +; RV32-NEXT: blt a1, s0, .LBB154_75 +; RV32-NEXT: j .LBB154_394 +; RV32-NEXT: .LBB154_75: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: blt a1, s1, .LBB154_77 +; RV32-NEXT: .LBB154_76: +; RV32-NEXT: mv s0, a2 +; RV32-NEXT: .LBB154_77: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw ra, 172(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s2, 76(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s7, 72(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 68(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 64(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 60(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 56(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, a2, .LBB154_395 +; RV32-NEXT: # %bb.78: +; RV32-NEXT: bne a1, s1, .LBB154_396 +; RV32-NEXT: .LBB154_79: +; RV32-NEXT: bge a1, s1, .LBB154_397 +; RV32-NEXT: .LBB154_80: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, t5, .LBB154_82 +; RV32-NEXT: .LBB154_81: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 328(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_82: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t2, t2, s0 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw t2, 208(t2) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s4, 140(s0) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s9, 328(s0) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s9, .LBB154_398 +; RV32-NEXT: # %bb.83: +; RV32-NEXT: bne a1, t5, .LBB154_399 +; RV32-NEXT: .LBB154_84: +; RV32-NEXT: blt a1, t5, .LBB154_86 +; RV32-NEXT: .LBB154_85: +; RV32-NEXT: mv a1, t5 +; RV32-NEXT: .LBB154_86: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t5, t5, s0 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: lw s1, 392(t5) # 4-byte Folded Reload +; RV32-NEXT: blt a1, s1, .LBB154_88 +; RV32-NEXT: # %bb.87: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: .LBB154_88: +; RV32-NEXT: bltu a0, t4, .LBB154_90 +; RV32-NEXT: # %bb.89: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: .LBB154_90: +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: addi t5, zero, 232 +; RV32-NEXT: mul t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: lw t4, 392(t4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB154_92 +; RV32-NEXT: # %bb.91: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_92: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t4, zero, 232 +; RV32-NEXT: mul a2, a2, t4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 392(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_400 +; RV32-NEXT: # %bb.93: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a5, .LBB154_401 +; RV32-NEXT: .LBB154_94: +; RV32-NEXT: bgeu a0, a3, .LBB154_402 +; RV32-NEXT: .LBB154_95: +; RV32-NEXT: beq a1, a5, .LBB154_97 +; RV32-NEXT: .LBB154_96: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_97: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a5, .LBB154_403 +; RV32-NEXT: # %bb.98: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a4, .LBB154_404 +; RV32-NEXT: .LBB154_99: +; RV32-NEXT: bgeu a0, a6, .LBB154_405 +; RV32-NEXT: .LBB154_100: +; RV32-NEXT: beq a1, a4, .LBB154_102 +; RV32-NEXT: .LBB154_101: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_102: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 136(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a4, .LBB154_406 +; RV32-NEXT: # %bb.103: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s2, .LBB154_407 +; RV32-NEXT: .LBB154_104: +; RV32-NEXT: bgeu a0, s7, .LBB154_408 +; RV32-NEXT: .LBB154_105: +; RV32-NEXT: bne a1, s2, .LBB154_409 +; RV32-NEXT: .LBB154_106: +; RV32-NEXT: bge a1, s2, .LBB154_410 +; RV32-NEXT: .LBB154_107: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s6, .LBB154_109 +; RV32-NEXT: .LBB154_108: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 332(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_109: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s7, 332(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s7, .LBB154_411 +; RV32-NEXT: # %bb.110: +; RV32-NEXT: bne a1, s6, .LBB154_412 +; RV32-NEXT: .LBB154_111: +; RV32-NEXT: blt a1, s6, .LBB154_113 +; RV32-NEXT: .LBB154_112: +; RV32-NEXT: mv a1, s6 +; RV32-NEXT: .LBB154_113: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a5, 396(a4) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a5, .LBB154_115 +; RV32-NEXT: # %bb.114: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB154_115: +; RV32-NEXT: bltu a0, a3, .LBB154_117 +; RV32-NEXT: # %bb.116: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB154_117: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 396(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 88(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB154_119 +; RV32-NEXT: # %bb.118: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_119: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 396(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 112(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_413 +; RV32-NEXT: # %bb.120: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s3, .LBB154_414 +; RV32-NEXT: .LBB154_121: +; RV32-NEXT: bgeu a0, s11, .LBB154_415 +; RV32-NEXT: .LBB154_122: +; RV32-NEXT: bne a1, s3, .LBB154_416 +; RV32-NEXT: .LBB154_123: +; RV32-NEXT: bge a1, s3, .LBB154_417 +; RV32-NEXT: .LBB154_124: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s10, .LBB154_126 +; RV32-NEXT: .LBB154_125: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 336(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_126: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 336(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s11, .LBB154_418 +; RV32-NEXT: # %bb.127: +; RV32-NEXT: bne a1, s10, .LBB154_419 +; RV32-NEXT: .LBB154_128: +; RV32-NEXT: bge a1, s10, .LBB154_420 +; RV32-NEXT: .LBB154_129: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, t1, .LBB154_131 +; RV32-NEXT: .LBB154_130: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 340(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_131: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s10, 340(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s10, .LBB154_421 +; RV32-NEXT: # %bb.132: +; RV32-NEXT: bne a1, t1, .LBB154_422 +; RV32-NEXT: .LBB154_133: +; RV32-NEXT: bge a1, t1, .LBB154_423 +; RV32-NEXT: .LBB154_134: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s5, .LBB154_136 +; RV32-NEXT: .LBB154_135: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 344(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_136: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t1, 344(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, t1, .LBB154_424 +; RV32-NEXT: # %bb.137: +; RV32-NEXT: bne a1, s5, .LBB154_425 +; RV32-NEXT: .LBB154_138: +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: bge a1, s5, .LBB154_426 +; RV32-NEXT: .LBB154_139: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, t6, .LBB154_141 +; RV32-NEXT: .LBB154_140: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 348(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_141: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s5, 348(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s5, .LBB154_427 +; RV32-NEXT: # %bb.142: +; RV32-NEXT: bne a1, t6, .LBB154_428 +; RV32-NEXT: .LBB154_143: +; RV32-NEXT: blt a1, t6, .LBB154_145 +; RV32-NEXT: .LBB154_144: +; RV32-NEXT: mv a1, t6 +; RV32-NEXT: .LBB154_145: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a4, .LBB154_147 +; RV32-NEXT: # %bb.146: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB154_147: +; RV32-NEXT: bltu a0, a5, .LBB154_149 +; RV32-NEXT: # %bb.148: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB154_149: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 124(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB154_151 +; RV32-NEXT: # %bb.150: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_151: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 400(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 128(a5) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_429 +; RV32-NEXT: # %bb.152: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s8, .LBB154_430 +; RV32-NEXT: .LBB154_153: +; RV32-NEXT: bgeu a0, a3, .LBB154_431 +; RV32-NEXT: .LBB154_154: +; RV32-NEXT: bne a1, s8, .LBB154_432 +; RV32-NEXT: .LBB154_155: +; RV32-NEXT: bge a1, s8, .LBB154_433 +; RV32-NEXT: .LBB154_156: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, t3, .LBB154_158 +; RV32-NEXT: .LBB154_157: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 352(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_158: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 352(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s9, .LBB154_434 +; RV32-NEXT: # %bb.159: +; RV32-NEXT: bne a1, t3, .LBB154_435 +; RV32-NEXT: .LBB154_160: +; RV32-NEXT: blt a1, t3, .LBB154_162 +; RV32-NEXT: .LBB154_161: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB154_162: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a3, .LBB154_164 +; RV32-NEXT: # %bb.163: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB154_164: +; RV32-NEXT: bltu a0, a4, .LBB154_166 +; RV32-NEXT: # %bb.165: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB154_166: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s10, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 164(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s10, .LBB154_168 +; RV32-NEXT: # %bb.167: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_168: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 404(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 144(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_436 +; RV32-NEXT: # %bb.169: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a7, .LBB154_437 +; RV32-NEXT: .LBB154_170: +; RV32-NEXT: bgeu a0, a5, .LBB154_438 +; RV32-NEXT: .LBB154_171: +; RV32-NEXT: beq a1, a7, .LBB154_173 +; RV32-NEXT: .LBB154_172: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_173: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 156(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a7, .LBB154_175 +; RV32-NEXT: # %bb.174: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: .LBB154_175: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s1, .LBB154_177 +; RV32-NEXT: # %bb.176: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 356(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_177: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s8, 356(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s2, 196(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw a7, 220(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s8, .LBB154_439 +; RV32-NEXT: # %bb.178: +; RV32-NEXT: bne a1, s1, .LBB154_440 +; RV32-NEXT: .LBB154_179: +; RV32-NEXT: bge a1, s1, .LBB154_441 +; RV32-NEXT: .LBB154_180: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s4, .LBB154_182 +; RV32-NEXT: .LBB154_181: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 360(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_182: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t5, 360(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, t5, .LBB154_442 +; RV32-NEXT: # %bb.183: +; RV32-NEXT: bne a1, s4, .LBB154_443 +; RV32-NEXT: .LBB154_184: +; RV32-NEXT: blt a1, s4, .LBB154_186 +; RV32-NEXT: .LBB154_185: +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: .LBB154_186: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s0, 408(a6) # 4-byte Folded Reload +; RV32-NEXT: blt a1, s0, .LBB154_188 +; RV32-NEXT: # %bb.187: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB154_188: +; RV32-NEXT: bltu a0, a4, .LBB154_190 +; RV32-NEXT: # %bb.189: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB154_190: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 408(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s11, .LBB154_192 +; RV32-NEXT: # %bb.191: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_192: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 408(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 148(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_444 +; RV32-NEXT: # %bb.193: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, t0, .LBB154_445 +; RV32-NEXT: .LBB154_194: +; RV32-NEXT: bgeu a0, a4, .LBB154_446 +; RV32-NEXT: .LBB154_195: +; RV32-NEXT: beq a1, t0, .LBB154_197 +; RV32-NEXT: .LBB154_196: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_197: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 168(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, t0, .LBB154_199 +; RV32-NEXT: # %bb.198: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB154_199: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a5, .LBB154_201 +; RV32-NEXT: # %bb.200: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 364(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_201: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t0, 364(a6) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, t0, .LBB154_203 +; RV32-NEXT: # %bb.202: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: .LBB154_203: +; RV32-NEXT: beq a1, a5, .LBB154_205 +; RV32-NEXT: # %bb.204: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_205: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 200(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 184(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 204(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a5, .LBB154_207 +; RV32-NEXT: # %bb.206: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB154_207: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 160(a5) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a5, .LBB154_209 +; RV32-NEXT: # %bb.208: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 368(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_209: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s4, 368(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s4, .LBB154_447 +; RV32-NEXT: # %bb.210: +; RV32-NEXT: bne a1, a5, .LBB154_448 +; RV32-NEXT: .LBB154_211: +; RV32-NEXT: blt a1, a5, .LBB154_213 +; RV32-NEXT: .LBB154_212: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB154_213: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 412(a5) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a5, .LBB154_215 +; RV32-NEXT: # %bb.214: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB154_215: +; RV32-NEXT: bltu a0, a3, .LBB154_217 +; RV32-NEXT: # %bb.216: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB154_217: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 412(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 216(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t5, .LBB154_219 +; RV32-NEXT: # %bb.218: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_219: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 412(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 180(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_449 +; RV32-NEXT: # %bb.220: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, ra, .LBB154_450 +; RV32-NEXT: .LBB154_221: +; RV32-NEXT: bgeu a0, a4, .LBB154_451 +; RV32-NEXT: .LBB154_222: +; RV32-NEXT: beq a1, ra, .LBB154_224 +; RV32-NEXT: .LBB154_223: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_224: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 176(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, ra, .LBB154_226 +; RV32-NEXT: # %bb.225: +; RV32-NEXT: mv a1, ra +; RV32-NEXT: .LBB154_226: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a4, .LBB154_228 +; RV32-NEXT: # %bb.227: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 372(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_228: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s7, 372(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s7, .LBB154_452 +; RV32-NEXT: # %bb.229: +; RV32-NEXT: bne a1, a4, .LBB154_453 +; RV32-NEXT: .LBB154_230: +; RV32-NEXT: blt a1, a4, .LBB154_232 +; RV32-NEXT: .LBB154_231: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB154_232: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 416(a4) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a4, .LBB154_234 +; RV32-NEXT: # %bb.233: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB154_234: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a6, 244(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a3, .LBB154_236 +; RV32-NEXT: # %bb.235: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB154_236: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 416(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 264(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 212(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s9, .LBB154_238 +; RV32-NEXT: # %bb.237: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_238: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 416(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 188(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_454 +; RV32-NEXT: # %bb.239: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a3, .LBB154_455 +; RV32-NEXT: .LBB154_240: +; RV32-NEXT: bgeu a0, s0, .LBB154_456 +; RV32-NEXT: .LBB154_241: +; RV32-NEXT: bne a1, a3, .LBB154_457 +; RV32-NEXT: .LBB154_242: +; RV32-NEXT: blt a1, a3, .LBB154_244 +; RV32-NEXT: .LBB154_243: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB154_244: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 192(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a3, .LBB154_246 +; RV32-NEXT: # %bb.245: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 376(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_246: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw s11, 376(t2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s11, .LBB154_458 +; RV32-NEXT: # %bb.247: +; RV32-NEXT: bne a1, a3, .LBB154_459 +; RV32-NEXT: .LBB154_248: +; RV32-NEXT: blt a1, a3, .LBB154_250 +; RV32-NEXT: .LBB154_249: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB154_250: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a3, .LBB154_252 +; RV32-NEXT: # %bb.251: +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: .LBB154_252: +; RV32-NEXT: bltu a0, s2, .LBB154_254 +; RV32-NEXT: # %bb.253: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: .LBB154_254: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t2, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul a3, a3, t3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 232(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t2, .LBB154_256 +; RV32-NEXT: # %bb.255: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_256: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 420(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a2, .LBB154_258 +; RV32-NEXT: # %bb.257: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB154_258: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 228(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a1, s1, .LBB154_460 +; RV32-NEXT: # %bb.259: +; RV32-NEXT: bgeu a0, t0, .LBB154_461 +; RV32-NEXT: .LBB154_260: +; RV32-NEXT: bne a1, s1, .LBB154_462 +; RV32-NEXT: .LBB154_261: +; RV32-NEXT: bge a1, s1, .LBB154_463 +; RV32-NEXT: .LBB154_262: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, t1, .LBB154_264 +; RV32-NEXT: .LBB154_263: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a2, a2, t0 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 380(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_264: +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul t0, t0, t2 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw t0, 380(t0) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, t0, .LBB154_464 +; RV32-NEXT: # %bb.265: +; RV32-NEXT: bne a1, t1, .LBB154_465 +; RV32-NEXT: .LBB154_266: +; RV32-NEXT: blt a1, t1, .LBB154_268 +; RV32-NEXT: .LBB154_267: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB154_268: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t1, zero, 232 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw s0, 424(t0) # 4-byte Folded Reload +; RV32-NEXT: blt a1, s0, .LBB154_270 +; RV32-NEXT: # %bb.269: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB154_270: +; RV32-NEXT: bltu a0, a4, .LBB154_272 +; RV32-NEXT: # %bb.271: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB154_272: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t3, 424(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t3, .LBB154_274 +; RV32-NEXT: # %bb.273: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_274: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 424(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 224(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a2, .LBB154_466 +; RV32-NEXT: # %bb.275: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a7, .LBB154_467 +; RV32-NEXT: .LBB154_276: +; RV32-NEXT: bgeu a0, a5, .LBB154_468 +; RV32-NEXT: .LBB154_277: +; RV32-NEXT: bne a1, a7, .LBB154_469 +; RV32-NEXT: .LBB154_278: +; RV32-NEXT: bge a1, a7, .LBB154_470 +; RV32-NEXT: .LBB154_279: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a4, .LBB154_281 +; RV32-NEXT: .LBB154_280: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 384(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_281: +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t1, 384(a5) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t0, 312(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, t1, .LBB154_283 +; RV32-NEXT: # %bb.282: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: .LBB154_283: +; RV32-NEXT: beq a1, a4, .LBB154_285 +; RV32-NEXT: # %bb.284: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_285: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 236(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a4, .LBB154_287 +; RV32-NEXT: # %bb.286: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB154_287: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a3, .LBB154_289 +; RV32-NEXT: # %bb.288: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 388(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB154_289: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a7, 388(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a7, .LBB154_291 +; RV32-NEXT: # %bb.290: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: .LBB154_291: +; RV32-NEXT: beq a1, a3, .LBB154_293 +; RV32-NEXT: # %bb.292: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_293: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 256(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 260(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 272(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 252(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 248(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a3, .LBB154_471 +; RV32-NEXT: # %bb.294: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s1, .LBB154_472 +; RV32-NEXT: .LBB154_295: +; RV32-NEXT: bgeu a0, t4, .LBB154_473 +; RV32-NEXT: .LBB154_296: +; RV32-NEXT: beq a1, s1, .LBB154_298 +; RV32-NEXT: .LBB154_297: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_298: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 240(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, s1, .LBB154_474 +; RV32-NEXT: # %bb.299: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a6, .LBB154_475 +; RV32-NEXT: .LBB154_300: +; RV32-NEXT: bgeu a0, a3, .LBB154_476 +; RV32-NEXT: .LBB154_301: +; RV32-NEXT: bne a1, a6, .LBB154_477 +; RV32-NEXT: .LBB154_302: +; RV32-NEXT: bge a1, a6, .LBB154_478 +; RV32-NEXT: .LBB154_303: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a4, .LBB154_305 +; RV32-NEXT: .LBB154_304: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB154_305: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 304(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 284(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a5, .LBB154_307 +; RV32-NEXT: # %bb.306: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB154_307: +; RV32-NEXT: beq a1, a4, .LBB154_309 +; RV32-NEXT: # %bb.308: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_309: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 268(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a4, .LBB154_479 +; RV32-NEXT: # %bb.310: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s0, .LBB154_480 +; RV32-NEXT: .LBB154_311: +; RV32-NEXT: bgeu a0, t1, .LBB154_481 +; RV32-NEXT: .LBB154_312: +; RV32-NEXT: bne a1, s0, .LBB154_482 +; RV32-NEXT: .LBB154_313: +; RV32-NEXT: bge a1, s0, .LBB154_483 +; RV32-NEXT: .LBB154_314: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a5, .LBB154_484 +; RV32-NEXT: .LBB154_315: +; RV32-NEXT: bgeu a0, t5, .LBB154_485 +; RV32-NEXT: .LBB154_316: +; RV32-NEXT: beq a1, a5, .LBB154_318 +; RV32-NEXT: .LBB154_317: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_318: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 276(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a5, .LBB154_486 +; RV32-NEXT: # %bb.319: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a4, .LBB154_487 +; RV32-NEXT: .LBB154_320: +; RV32-NEXT: bgeu a0, a7, .LBB154_488 +; RV32-NEXT: .LBB154_321: +; RV32-NEXT: beq a1, a4, .LBB154_323 +; RV32-NEXT: .LBB154_322: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_323: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 296(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 292(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a4, .LBB154_325 +; RV32-NEXT: # %bb.324: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB154_325: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a4, a4, a7 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 280(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a3, .LBB154_489 +; RV32-NEXT: # %bb.326: +; RV32-NEXT: bgeu a0, a4, .LBB154_490 +; RV32-NEXT: .LBB154_327: +; RV32-NEXT: bne a1, a3, .LBB154_491 +; RV32-NEXT: .LBB154_328: +; RV32-NEXT: blt a1, a3, .LBB154_330 +; RV32-NEXT: .LBB154_329: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB154_330: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 288(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a5, .LBB154_332 +; RV32-NEXT: # %bb.331: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB154_332: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a3, a3, a7 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 316(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a4, .LBB154_334 +; RV32-NEXT: # %bb.333: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB154_334: +; RV32-NEXT: beq a1, a5, .LBB154_336 +; RV32-NEXT: # %bb.335: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_336: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 320(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a1, a5, .LBB154_338 +; RV32-NEXT: # %bb.337: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB154_338: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 300(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a5, .LBB154_492 +; RV32-NEXT: # %bb.339: +; RV32-NEXT: bgeu a0, s1, .LBB154_493 +; RV32-NEXT: .LBB154_340: +; RV32-NEXT: bne a1, a5, .LBB154_494 +; RV32-NEXT: .LBB154_341: +; RV32-NEXT: blt a1, a5, .LBB154_343 +; RV32-NEXT: .LBB154_342: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB154_343: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 308(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a5, .LBB154_495 +; RV32-NEXT: # %bb.344: +; RV32-NEXT: bgeu a0, a6, .LBB154_496 +; RV32-NEXT: .LBB154_345: +; RV32-NEXT: beq a1, a5, .LBB154_347 +; RV32-NEXT: .LBB154_346: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB154_347: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 324(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a1, a5, .LBB154_497 +; RV32-NEXT: # %bb.348: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s0, .LBB154_498 +; RV32-NEXT: .LBB154_349: +; RV32-NEXT: bgeu a0, t0, .LBB154_499 +; RV32-NEXT: .LBB154_350: +; RV32-NEXT: bne a1, s0, .LBB154_500 +; RV32-NEXT: .LBB154_351: +; RV32-NEXT: bge a1, s0, .LBB154_501 +; RV32-NEXT: .LBB154_352: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a3, .LBB154_502 +; RV32-NEXT: .LBB154_353: +; RV32-NEXT: bgeu a0, a4, .LBB154_503 +; RV32-NEXT: .LBB154_354: +; RV32-NEXT: bne a1, a3, .LBB154_504 +; RV32-NEXT: .LBB154_355: +; RV32-NEXT: blt a1, a3, .LBB154_357 +; RV32-NEXT: .LBB154_356: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB154_357: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 428(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 432(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 436(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 440(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 444(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 448(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 452(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 456(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 460(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 464(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 468(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 472(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 476(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 480 +; RV32-NEXT: ret +; RV32-NEXT: .LBB154_358: +; RV32-NEXT: mv s4, t6 +; RV32-NEXT: bgeu a0, t6, .LBB154_359 +; RV32-NEXT: j .LBB154_2 +; RV32-NEXT: .LBB154_359: +; RV32-NEXT: mv a0, t6 +; RV32-NEXT: bne a1, a3, .LBB154_360 +; RV32-NEXT: j .LBB154_3 +; RV32-NEXT: .LBB154_360: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: bge a1, a3, .LBB154_361 +; RV32-NEXT: j .LBB154_4 +; RV32-NEXT: .LBB154_361: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bge a1, a4, .LBB154_362 +; RV32-NEXT: j .LBB154_5 +; RV32-NEXT: .LBB154_362: +; RV32-NEXT: mv a3, t4 +; RV32-NEXT: bgeu a0, t4, .LBB154_363 +; RV32-NEXT: j .LBB154_6 +; RV32-NEXT: .LBB154_363: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: beq a1, a4, .LBB154_505 +; RV32-NEXT: j .LBB154_7 +; RV32-NEXT: .LBB154_505: +; RV32-NEXT: j .LBB154_8 +; RV32-NEXT: .LBB154_364: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bge a1, s10, .LBB154_365 +; RV32-NEXT: j .LBB154_10 +; RV32-NEXT: .LBB154_365: +; RV32-NEXT: mv a4, a7 +; RV32-NEXT: bgeu a0, a7, .LBB154_366 +; RV32-NEXT: j .LBB154_11 +; RV32-NEXT: .LBB154_366: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, s10, .LBB154_367 +; RV32-NEXT: j .LBB154_12 +; RV32-NEXT: .LBB154_367: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge a1, s10, .LBB154_368 +; RV32-NEXT: j .LBB154_13 +; RV32-NEXT: .LBB154_368: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, s3, .LBB154_506 +; RV32-NEXT: j .LBB154_14 +; RV32-NEXT: .LBB154_506: +; RV32-NEXT: j .LBB154_15 +; RV32-NEXT: .LBB154_369: +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: bne a1, s3, .LBB154_370 +; RV32-NEXT: j .LBB154_17 +; RV32-NEXT: .LBB154_370: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge a1, s3, .LBB154_371 +; RV32-NEXT: j .LBB154_18 +; RV32-NEXT: .LBB154_371: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bge a1, a5, .LBB154_372 +; RV32-NEXT: j .LBB154_19 +; RV32-NEXT: .LBB154_372: +; RV32-NEXT: mv a4, t5 +; RV32-NEXT: bgeu a0, t5, .LBB154_373 +; RV32-NEXT: j .LBB154_20 +; RV32-NEXT: .LBB154_373: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB154_374 +; RV32-NEXT: j .LBB154_21 +; RV32-NEXT: .LBB154_374: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge a1, a5, .LBB154_375 +; RV32-NEXT: j .LBB154_22 +; RV32-NEXT: .LBB154_375: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bge a1, t1, .LBB154_376 +; RV32-NEXT: j .LBB154_23 +; RV32-NEXT: .LBB154_376: +; RV32-NEXT: mv a4, s5 +; RV32-NEXT: bgeu a0, s5, .LBB154_377 +; RV32-NEXT: j .LBB154_24 +; RV32-NEXT: .LBB154_377: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t1, .LBB154_507 +; RV32-NEXT: j .LBB154_25 +; RV32-NEXT: .LBB154_507: +; RV32-NEXT: j .LBB154_26 +; RV32-NEXT: .LBB154_378: +; RV32-NEXT: mv a0, ra +; RV32-NEXT: bne a1, s1, .LBB154_379 +; RV32-NEXT: j .LBB154_32 +; RV32-NEXT: .LBB154_379: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge a1, s1, .LBB154_380 +; RV32-NEXT: j .LBB154_33 +; RV32-NEXT: .LBB154_380: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, s11, .LBB154_508 +; RV32-NEXT: j .LBB154_34 +; RV32-NEXT: .LBB154_508: +; RV32-NEXT: j .LBB154_35 +; RV32-NEXT: .LBB154_381: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: bne a1, t3, .LBB154_382 +; RV32-NEXT: j .LBB154_45 +; RV32-NEXT: .LBB154_382: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: blt a1, t3, .LBB154_509 +; RV32-NEXT: j .LBB154_46 +; RV32-NEXT: .LBB154_509: +; RV32-NEXT: j .LBB154_47 +; RV32-NEXT: .LBB154_383: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, a6, .LBB154_384 +; RV32-NEXT: j .LBB154_59 +; RV32-NEXT: .LBB154_384: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: bge a1, a6, .LBB154_385 +; RV32-NEXT: j .LBB154_60 +; RV32-NEXT: .LBB154_385: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a1, t2, .LBB154_510 +; RV32-NEXT: j .LBB154_61 +; RV32-NEXT: .LBB154_510: +; RV32-NEXT: j .LBB154_62 +; RV32-NEXT: .LBB154_386: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: bne a1, t2, .LBB154_387 +; RV32-NEXT: j .LBB154_64 +; RV32-NEXT: .LBB154_387: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: blt a1, t2, .LBB154_511 +; RV32-NEXT: j .LBB154_65 +; RV32-NEXT: .LBB154_511: +; RV32-NEXT: j .LBB154_66 +; RV32-NEXT: .LBB154_388: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bgeu a0, a5, .LBB154_389 +; RV32-NEXT: j .LBB154_68 +; RV32-NEXT: .LBB154_389: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, s1, .LBB154_390 +; RV32-NEXT: j .LBB154_69 +; RV32-NEXT: .LBB154_390: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, s1, .LBB154_391 +; RV32-NEXT: j .LBB154_70 +; RV32-NEXT: .LBB154_391: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: blt a1, s0, .LBB154_512 +; RV32-NEXT: j .LBB154_71 +; RV32-NEXT: .LBB154_512: +; RV32-NEXT: j .LBB154_72 +; RV32-NEXT: .LBB154_392: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, s0, .LBB154_393 +; RV32-NEXT: j .LBB154_74 +; RV32-NEXT: .LBB154_393: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bge a1, s0, .LBB154_394 +; RV32-NEXT: j .LBB154_75 +; RV32-NEXT: .LBB154_394: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: blt a1, s1, .LBB154_513 +; RV32-NEXT: j .LBB154_76 +; RV32-NEXT: .LBB154_513: +; RV32-NEXT: j .LBB154_77 +; RV32-NEXT: .LBB154_395: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beq a1, s1, .LBB154_79 +; RV32-NEXT: .LBB154_396: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: blt a1, s1, .LBB154_80 +; RV32-NEXT: .LBB154_397: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, t5, .LBB154_81 +; RV32-NEXT: j .LBB154_82 +; RV32-NEXT: .LBB154_398: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t5, .LBB154_84 +; RV32-NEXT: .LBB154_399: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, t5, .LBB154_85 +; RV32-NEXT: j .LBB154_86 +; RV32-NEXT: .LBB154_400: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a5, .LBB154_94 +; RV32-NEXT: .LBB154_401: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a0, a3, .LBB154_95 +; RV32-NEXT: .LBB154_402: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: bne a1, a5, .LBB154_96 +; RV32-NEXT: j .LBB154_97 +; RV32-NEXT: .LBB154_403: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a4, .LBB154_99 +; RV32-NEXT: .LBB154_404: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a0, a6, .LBB154_100 +; RV32-NEXT: .LBB154_405: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a4, .LBB154_101 +; RV32-NEXT: j .LBB154_102 +; RV32-NEXT: .LBB154_406: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s2, .LBB154_104 +; RV32-NEXT: .LBB154_407: +; RV32-NEXT: mv a2, s7 +; RV32-NEXT: bltu a0, s7, .LBB154_105 +; RV32-NEXT: .LBB154_408: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s2, .LBB154_106 +; RV32-NEXT: .LBB154_409: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s2, .LBB154_107 +; RV32-NEXT: .LBB154_410: +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s6, .LBB154_108 +; RV32-NEXT: j .LBB154_109 +; RV32-NEXT: .LBB154_411: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s6, .LBB154_111 +; RV32-NEXT: .LBB154_412: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, s6, .LBB154_112 +; RV32-NEXT: j .LBB154_113 +; RV32-NEXT: .LBB154_413: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s3, .LBB154_121 +; RV32-NEXT: .LBB154_414: +; RV32-NEXT: mv a2, s11 +; RV32-NEXT: bltu a0, s11, .LBB154_122 +; RV32-NEXT: .LBB154_415: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s3, .LBB154_123 +; RV32-NEXT: .LBB154_416: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s3, .LBB154_124 +; RV32-NEXT: .LBB154_417: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s10, .LBB154_125 +; RV32-NEXT: j .LBB154_126 +; RV32-NEXT: .LBB154_418: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s10, .LBB154_128 +; RV32-NEXT: .LBB154_419: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s10, .LBB154_129 +; RV32-NEXT: .LBB154_420: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, t1, .LBB154_130 +; RV32-NEXT: j .LBB154_131 +; RV32-NEXT: .LBB154_421: +; RV32-NEXT: mv a0, s10 +; RV32-NEXT: beq a1, t1, .LBB154_133 +; RV32-NEXT: .LBB154_422: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, t1, .LBB154_134 +; RV32-NEXT: .LBB154_423: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s5, .LBB154_135 +; RV32-NEXT: j .LBB154_136 +; RV32-NEXT: .LBB154_424: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s5, .LBB154_138 +; RV32-NEXT: .LBB154_425: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: blt a1, s5, .LBB154_139 +; RV32-NEXT: .LBB154_426: +; RV32-NEXT: mv a1, s5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, t6, .LBB154_140 +; RV32-NEXT: j .LBB154_141 +; RV32-NEXT: .LBB154_427: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t6, .LBB154_143 +; RV32-NEXT: .LBB154_428: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, t6, .LBB154_144 +; RV32-NEXT: j .LBB154_145 +; RV32-NEXT: .LBB154_429: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s8, .LBB154_153 +; RV32-NEXT: .LBB154_430: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a0, a3, .LBB154_154 +; RV32-NEXT: .LBB154_431: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, s8, .LBB154_155 +; RV32-NEXT: .LBB154_432: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s8, .LBB154_156 +; RV32-NEXT: .LBB154_433: +; RV32-NEXT: mv a1, s8 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, t3, .LBB154_157 +; RV32-NEXT: j .LBB154_158 +; RV32-NEXT: .LBB154_434: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t3, .LBB154_160 +; RV32-NEXT: .LBB154_435: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, t3, .LBB154_161 +; RV32-NEXT: j .LBB154_162 +; RV32-NEXT: .LBB154_436: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a7, .LBB154_170 +; RV32-NEXT: .LBB154_437: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a0, a5, .LBB154_171 +; RV32-NEXT: .LBB154_438: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, a7, .LBB154_172 +; RV32-NEXT: j .LBB154_173 +; RV32-NEXT: .LBB154_439: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: beq a1, s1, .LBB154_179 +; RV32-NEXT: .LBB154_440: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s1, .LBB154_180 +; RV32-NEXT: .LBB154_441: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, s4, .LBB154_181 +; RV32-NEXT: j .LBB154_182 +; RV32-NEXT: .LBB154_442: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: beq a1, s4, .LBB154_184 +; RV32-NEXT: .LBB154_443: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, s4, .LBB154_185 +; RV32-NEXT: j .LBB154_186 +; RV32-NEXT: .LBB154_444: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, t0, .LBB154_194 +; RV32-NEXT: .LBB154_445: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB154_195 +; RV32-NEXT: .LBB154_446: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, t0, .LBB154_196 +; RV32-NEXT: j .LBB154_197 +; RV32-NEXT: .LBB154_447: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: beq a1, a5, .LBB154_211 +; RV32-NEXT: .LBB154_448: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a5, .LBB154_212 +; RV32-NEXT: j .LBB154_213 +; RV32-NEXT: .LBB154_449: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, ra, .LBB154_221 +; RV32-NEXT: .LBB154_450: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB154_222 +; RV32-NEXT: .LBB154_451: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, ra, .LBB154_223 +; RV32-NEXT: j .LBB154_224 +; RV32-NEXT: .LBB154_452: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, a4, .LBB154_230 +; RV32-NEXT: .LBB154_453: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a4, .LBB154_231 +; RV32-NEXT: j .LBB154_232 +; RV32-NEXT: .LBB154_454: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a3, .LBB154_240 +; RV32-NEXT: .LBB154_455: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: bltu a0, s0, .LBB154_241 +; RV32-NEXT: .LBB154_456: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: beq a1, a3, .LBB154_242 +; RV32-NEXT: .LBB154_457: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a3, .LBB154_243 +; RV32-NEXT: j .LBB154_244 +; RV32-NEXT: .LBB154_458: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, a3, .LBB154_248 +; RV32-NEXT: .LBB154_459: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a3, .LBB154_249 +; RV32-NEXT: j .LBB154_250 +; RV32-NEXT: .LBB154_460: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu a0, t0, .LBB154_260 +; RV32-NEXT: .LBB154_461: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s1, .LBB154_261 +; RV32-NEXT: .LBB154_462: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s1, .LBB154_262 +; RV32-NEXT: .LBB154_463: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, t1, .LBB154_263 +; RV32-NEXT: j .LBB154_264 +; RV32-NEXT: .LBB154_464: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, t1, .LBB154_266 +; RV32-NEXT: .LBB154_465: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, t1, .LBB154_267 +; RV32-NEXT: j .LBB154_268 +; RV32-NEXT: .LBB154_466: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a7, .LBB154_276 +; RV32-NEXT: .LBB154_467: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a0, a5, .LBB154_277 +; RV32-NEXT: .LBB154_468: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: beq a1, a7, .LBB154_278 +; RV32-NEXT: .LBB154_469: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, a7, .LBB154_279 +; RV32-NEXT: .LBB154_470: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a4, .LBB154_280 +; RV32-NEXT: j .LBB154_281 +; RV32-NEXT: .LBB154_471: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s1, .LBB154_295 +; RV32-NEXT: .LBB154_472: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: bltu a0, t4, .LBB154_296 +; RV32-NEXT: .LBB154_473: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: bne a1, s1, .LBB154_297 +; RV32-NEXT: j .LBB154_298 +; RV32-NEXT: .LBB154_474: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a6, .LBB154_300 +; RV32-NEXT: .LBB154_475: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a0, a3, .LBB154_301 +; RV32-NEXT: .LBB154_476: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, a6, .LBB154_302 +; RV32-NEXT: .LBB154_477: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, a6, .LBB154_303 +; RV32-NEXT: .LBB154_478: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a1, a4, .LBB154_304 +; RV32-NEXT: j .LBB154_305 +; RV32-NEXT: .LBB154_479: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s0, .LBB154_311 +; RV32-NEXT: .LBB154_480: +; RV32-NEXT: mv a2, t1 +; RV32-NEXT: bltu a0, t1, .LBB154_312 +; RV32-NEXT: .LBB154_481: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s0, .LBB154_313 +; RV32-NEXT: .LBB154_482: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s0, .LBB154_314 +; RV32-NEXT: .LBB154_483: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a5, .LBB154_315 +; RV32-NEXT: .LBB154_484: +; RV32-NEXT: mv a2, t5 +; RV32-NEXT: bltu a0, t5, .LBB154_316 +; RV32-NEXT: .LBB154_485: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB154_317 +; RV32-NEXT: j .LBB154_318 +; RV32-NEXT: .LBB154_486: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a4, .LBB154_320 +; RV32-NEXT: .LBB154_487: +; RV32-NEXT: mv a2, a7 +; RV32-NEXT: bltu a0, a7, .LBB154_321 +; RV32-NEXT: .LBB154_488: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, a4, .LBB154_322 +; RV32-NEXT: j .LBB154_323 +; RV32-NEXT: .LBB154_489: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB154_327 +; RV32-NEXT: .LBB154_490: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB154_328 +; RV32-NEXT: .LBB154_491: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a3, .LBB154_329 +; RV32-NEXT: j .LBB154_330 +; RV32-NEXT: .LBB154_492: +; RV32-NEXT: mv a2, s1 +; RV32-NEXT: bltu a0, s1, .LBB154_340 +; RV32-NEXT: .LBB154_493: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: beq a1, a5, .LBB154_341 +; RV32-NEXT: .LBB154_494: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a5, .LBB154_342 +; RV32-NEXT: j .LBB154_343 +; RV32-NEXT: .LBB154_495: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a0, a6, .LBB154_345 +; RV32-NEXT: .LBB154_496: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a5, .LBB154_346 +; RV32-NEXT: j .LBB154_347 +; RV32-NEXT: .LBB154_497: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, s0, .LBB154_349 +; RV32-NEXT: .LBB154_498: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu a0, t0, .LBB154_350 +; RV32-NEXT: .LBB154_499: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s0, .LBB154_351 +; RV32-NEXT: .LBB154_500: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a1, s0, .LBB154_352 +; RV32-NEXT: .LBB154_501: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a1, a3, .LBB154_353 +; RV32-NEXT: .LBB154_502: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB154_354 +; RV32-NEXT: .LBB154_503: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB154_355 +; RV32-NEXT: .LBB154_504: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a1, a3, .LBB154_356 +; RV32-NEXT: j .LBB154_357 +; +; RV64-LABEL: vreduce_smin_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vmin.vv v16, v24, v16 +; RV64-NEXT: vmin.vv v8, v8, v0 +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmin.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>) + +define i8 @vreduce_smax_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) + +define i8 @vreduce_smax_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) + +define i8 @vreduce_smax_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) + +define i8 @vreduce_smax_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) + +define i8 @vreduce_smax_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) + +define i8 @vreduce_smax_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) + +define i8 @vreduce_smax_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) + +define i8 @vreduce_smax_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.smax.v256i8(<256 x i8>) + +define i8 @vreduce_smax_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_smax_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: addi a0, zero, -128 +; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.smax.v1i16(<1 x i16>) + +define i16 @vreduce_smax_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) + +define i16 @vreduce_smax_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) + +define i16 @vreduce_smax_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) + +define i16 @vreduce_smax_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) + +define i16 @vreduce_smax_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) + +define i16 @vreduce_smax_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) + +define i16 @vreduce_smax_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>) + +define i16 @vreduce_smax_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_smax_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a2, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.smax.v1i32(<1 x i32>) + +define i32 @vreduce_smax_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) + +define i32 @vreduce_smax_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) + +define i32 @vreduce_smax_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredmax.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) + +define i32 @vreduce_smax_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredmax.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) + +define i32 @vreduce_smax_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredmax.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) + +define i32 @vreduce_smax_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>) + +define i32 @vreduce_smax_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_smax_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>) + +define i64 @vreduce_smax_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_smax_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smax_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) + +define i64 @vreduce_smax_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_smax_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v26, v26, a1 +; RV32-NEXT: vmv.v.i v27, 0 +; RV32-NEXT: vsll.vx v27, v27, a1 +; RV32-NEXT: vsrl.vx v27, v27, a1 +; RV32-NEXT: vor.vv v26, v27, v26 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredmax.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smax_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredmax.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) + +define i64 @vreduce_smax_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_smax_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v25, v25, a1 +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vsll.vx v28, v28, a1 +; RV32-NEXT: vsrl.vx v28, v28, a1 +; RV32-NEXT: vor.vv v25, v28, v25 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredmax.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smax_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredmax.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) + +define i64 @vreduce_smax_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_smax_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v25, v25, a1 +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsll.vx v26, v26, a1 +; RV32-NEXT: vsrl.vx v26, v26, a1 +; RV32-NEXT: vor.vv v25, v26, v25 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredmax.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smax_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredmax.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) + +define i64 @vreduce_smax_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_smax_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsll.vx v25, v25, a1 +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsll.vx v26, v26, a1 +; RV32-NEXT: vsrl.vx v26, v26, a1 +; RV32-NEXT: vor.vv v25, v26, v25 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredmax.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smax_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmax.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>) + +define i64 @vreduce_smax_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_smax_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vmax.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vmax.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vmax.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vmax.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vmax.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_smax_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmax.vv v8, v8, v16 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmax.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.smax.v64i64(<64 x i64>) + +define i64 @vreduce_smax_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_smax_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -480 +; RV32-NEXT: sw ra, 476(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 472(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 468(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 464(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 460(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 456(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 452(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 448(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 444(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 440(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 436(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 432(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 428(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 320(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 324(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 312(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 316(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 304(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 308(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 296(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 300(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 288(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 292(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 280(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 284(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 272(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 276(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 264(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 268(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 256(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 260(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 248(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 252(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 240(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 244(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 232(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 236(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 388(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 228(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 384(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 224(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 216(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 220(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 212(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 424(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 380(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 208(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 200(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 204(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 196(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 420(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 376(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 192(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 184(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 188(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 180(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 416(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 372(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 176(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 168(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 172(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 164(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 412(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 368(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 160(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 364(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 156(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 148(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 152(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 144(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 408(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 360(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 140(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 356(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 136(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 128(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 132(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 124(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 404(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 352(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 120(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 112(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 116(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 400(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 348(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 108(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 344(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 340(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 336(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s9, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 396(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 332(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v0 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 392(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 328(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 1 +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 40(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 44(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 32(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 36(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 28(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 12(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 24(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 20(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 16(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s11, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s ra, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s10, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v0 +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v0 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a1, v24 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t6, zero, 24 +; RV32-NEXT: mul a2, a2, t6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: mv s4, a0 +; RV32-NEXT: blt a3, a1, .LBB185_1 +; RV32-NEXT: j .LBB185_358 +; RV32-NEXT: .LBB185_1: +; RV32-NEXT: bltu t6, a0, .LBB185_2 +; RV32-NEXT: j .LBB185_359 +; RV32-NEXT: .LBB185_2: +; RV32-NEXT: beq a1, a3, .LBB185_3 +; RV32-NEXT: j .LBB185_360 +; RV32-NEXT: .LBB185_3: +; RV32-NEXT: blt a3, a1, .LBB185_4 +; RV32-NEXT: j .LBB185_361 +; RV32-NEXT: .LBB185_4: +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: blt a4, a1, .LBB185_5 +; RV32-NEXT: j .LBB185_362 +; RV32-NEXT: .LBB185_5: +; RV32-NEXT: bltu t4, a0, .LBB185_6 +; RV32-NEXT: j .LBB185_363 +; RV32-NEXT: .LBB185_6: +; RV32-NEXT: beq a1, a4, .LBB185_8 +; RV32-NEXT: .LBB185_7: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB185_8: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s0, 88(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s9, 52(a2) # 4-byte Folded Spill +; RV32-NEXT: mv t4, s7 +; RV32-NEXT: blt a4, a1, .LBB185_9 +; RV32-NEXT: j .LBB185_364 +; RV32-NEXT: .LBB185_9: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt s10, a1, .LBB185_10 +; RV32-NEXT: j .LBB185_365 +; RV32-NEXT: .LBB185_10: +; RV32-NEXT: bltu a7, a0, .LBB185_11 +; RV32-NEXT: j .LBB185_366 +; RV32-NEXT: .LBB185_11: +; RV32-NEXT: beq a1, s10, .LBB185_12 +; RV32-NEXT: j .LBB185_367 +; RV32-NEXT: .LBB185_12: +; RV32-NEXT: blt s10, a1, .LBB185_13 +; RV32-NEXT: j .LBB185_368 +; RV32-NEXT: .LBB185_13: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt s3, a1, .LBB185_15 +; RV32-NEXT: .LBB185_14: +; RV32-NEXT: mv a4, s6 +; RV32-NEXT: .LBB185_15: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s10, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s6, a0, .LBB185_16 +; RV32-NEXT: j .LBB185_369 +; RV32-NEXT: .LBB185_16: +; RV32-NEXT: beq a1, s3, .LBB185_17 +; RV32-NEXT: j .LBB185_370 +; RV32-NEXT: .LBB185_17: +; RV32-NEXT: blt s3, a1, .LBB185_18 +; RV32-NEXT: j .LBB185_371 +; RV32-NEXT: .LBB185_18: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt a5, a1, .LBB185_19 +; RV32-NEXT: j .LBB185_372 +; RV32-NEXT: .LBB185_19: +; RV32-NEXT: bltu t5, a0, .LBB185_20 +; RV32-NEXT: j .LBB185_373 +; RV32-NEXT: .LBB185_20: +; RV32-NEXT: beq a1, a5, .LBB185_21 +; RV32-NEXT: j .LBB185_374 +; RV32-NEXT: .LBB185_21: +; RV32-NEXT: blt a5, a1, .LBB185_22 +; RV32-NEXT: j .LBB185_375 +; RV32-NEXT: .LBB185_22: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt t1, a1, .LBB185_23 +; RV32-NEXT: j .LBB185_376 +; RV32-NEXT: .LBB185_23: +; RV32-NEXT: bltu s5, a0, .LBB185_24 +; RV32-NEXT: j .LBB185_377 +; RV32-NEXT: .LBB185_24: +; RV32-NEXT: beq a1, t1, .LBB185_26 +; RV32-NEXT: .LBB185_25: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB185_26: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s5, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: blt t1, a1, .LBB185_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB185_28: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt s1, a1, .LBB185_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: mv a4, ra +; RV32-NEXT: .LBB185_30: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu ra, a0, .LBB185_31 +; RV32-NEXT: j .LBB185_378 +; RV32-NEXT: .LBB185_31: +; RV32-NEXT: beq a1, s1, .LBB185_32 +; RV32-NEXT: j .LBB185_379 +; RV32-NEXT: .LBB185_32: +; RV32-NEXT: blt s1, a1, .LBB185_33 +; RV32-NEXT: j .LBB185_380 +; RV32-NEXT: .LBB185_33: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt s11, a1, .LBB185_35 +; RV32-NEXT: .LBB185_34: +; RV32-NEXT: mv a4, s8 +; RV32-NEXT: .LBB185_35: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t5, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s8, a0, .LBB185_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: .LBB185_37: +; RV32-NEXT: beq a1, s11, .LBB185_39 +; RV32-NEXT: # %bb.38: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB185_39: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 132(a2) # 4-byte Folded Reload +; RV32-NEXT: blt s11, a1, .LBB185_41 +; RV32-NEXT: # %bb.40: +; RV32-NEXT: mv a1, s11 +; RV32-NEXT: .LBB185_41: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt t3, a1, .LBB185_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: mv a4, s2 +; RV32-NEXT: .LBB185_43: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s3, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s11, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s2, a0, .LBB185_44 +; RV32-NEXT: j .LBB185_381 +; RV32-NEXT: .LBB185_44: +; RV32-NEXT: beq a1, t3, .LBB185_45 +; RV32-NEXT: j .LBB185_382 +; RV32-NEXT: .LBB185_45: +; RV32-NEXT: blt t3, a1, .LBB185_47 +; RV32-NEXT: .LBB185_46: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB185_47: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 16(a3) # 4-byte Folded Reload +; RV32-NEXT: blt t0, a1, .LBB185_49 +; RV32-NEXT: # %bb.48: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: .LBB185_49: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 24(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 20(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu s0, a0, .LBB185_51 +; RV32-NEXT: # %bb.50: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: .LBB185_51: +; RV32-NEXT: beq a1, t0, .LBB185_53 +; RV32-NEXT: # %bb.52: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_53: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s8, 116(a2) # 4-byte Folded Reload +; RV32-NEXT: blt t0, a1, .LBB185_55 +; RV32-NEXT: # %bb.54: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB185_55: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: blt a6, a1, .LBB185_57 +; RV32-NEXT: # %bb.56: +; RV32-NEXT: mv s0, a4 +; RV32-NEXT: .LBB185_57: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 152(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s6, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a0, .LBB185_58 +; RV32-NEXT: j .LBB185_383 +; RV32-NEXT: .LBB185_58: +; RV32-NEXT: beq a1, a6, .LBB185_59 +; RV32-NEXT: j .LBB185_384 +; RV32-NEXT: .LBB185_59: +; RV32-NEXT: blt a6, a1, .LBB185_60 +; RV32-NEXT: j .LBB185_385 +; RV32-NEXT: .LBB185_60: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt t2, a1, .LBB185_62 +; RV32-NEXT: .LBB185_61: +; RV32-NEXT: mv a4, s1 +; RV32-NEXT: .LBB185_62: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a6, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s1, a0, .LBB185_63 +; RV32-NEXT: j .LBB185_386 +; RV32-NEXT: .LBB185_63: +; RV32-NEXT: beq a1, t2, .LBB185_64 +; RV32-NEXT: j .LBB185_387 +; RV32-NEXT: .LBB185_64: +; RV32-NEXT: blt t2, a1, .LBB185_66 +; RV32-NEXT: .LBB185_65: +; RV32-NEXT: mv a1, t2 +; RV32-NEXT: .LBB185_66: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t6, 108(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 12(a3) # 4-byte Folded Reload +; RV32-NEXT: blt s1, a1, .LBB185_67 +; RV32-NEXT: j .LBB185_388 +; RV32-NEXT: .LBB185_67: +; RV32-NEXT: bltu a5, a0, .LBB185_68 +; RV32-NEXT: j .LBB185_389 +; RV32-NEXT: .LBB185_68: +; RV32-NEXT: beq a1, s1, .LBB185_69 +; RV32-NEXT: j .LBB185_390 +; RV32-NEXT: .LBB185_69: +; RV32-NEXT: blt s1, a1, .LBB185_70 +; RV32-NEXT: j .LBB185_391 +; RV32-NEXT: .LBB185_70: +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: blt s0, a1, .LBB185_72 +; RV32-NEXT: .LBB185_71: +; RV32-NEXT: mv a5, a6 +; RV32-NEXT: .LBB185_72: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t3, 120(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a6, a0, .LBB185_73 +; RV32-NEXT: j .LBB185_392 +; RV32-NEXT: .LBB185_73: +; RV32-NEXT: beq a1, s0, .LBB185_74 +; RV32-NEXT: j .LBB185_393 +; RV32-NEXT: .LBB185_74: +; RV32-NEXT: blt s0, a1, .LBB185_75 +; RV32-NEXT: j .LBB185_394 +; RV32-NEXT: .LBB185_75: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: blt s1, a1, .LBB185_77 +; RV32-NEXT: .LBB185_76: +; RV32-NEXT: mv s0, a2 +; RV32-NEXT: .LBB185_77: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw ra, 172(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s2, 76(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s7, 72(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 68(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 64(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 60(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 56(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a0, .LBB185_395 +; RV32-NEXT: # %bb.78: +; RV32-NEXT: bne a1, s1, .LBB185_396 +; RV32-NEXT: .LBB185_79: +; RV32-NEXT: bge s1, a1, .LBB185_397 +; RV32-NEXT: .LBB185_80: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt t5, a1, .LBB185_82 +; RV32-NEXT: .LBB185_81: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 328(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_82: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t2, t2, s0 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw t2, 208(t2) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s4, 140(s0) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s9, 328(s0) # 4-byte Folded Reload +; RV32-NEXT: bgeu s9, a0, .LBB185_398 +; RV32-NEXT: # %bb.83: +; RV32-NEXT: bne a1, t5, .LBB185_399 +; RV32-NEXT: .LBB185_84: +; RV32-NEXT: blt t5, a1, .LBB185_86 +; RV32-NEXT: .LBB185_85: +; RV32-NEXT: mv a1, t5 +; RV32-NEXT: .LBB185_86: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t5, t5, s0 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: lw s1, 392(t5) # 4-byte Folded Reload +; RV32-NEXT: blt s1, a1, .LBB185_88 +; RV32-NEXT: # %bb.87: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: .LBB185_88: +; RV32-NEXT: bltu t4, a0, .LBB185_90 +; RV32-NEXT: # %bb.89: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: .LBB185_90: +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: addi t5, zero, 232 +; RV32-NEXT: mul t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: lw t4, 392(t4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB185_92 +; RV32-NEXT: # %bb.91: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_92: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t4, zero, 232 +; RV32-NEXT: mul a2, a2, t4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 392(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_400 +; RV32-NEXT: # %bb.93: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a5, a1, .LBB185_401 +; RV32-NEXT: .LBB185_94: +; RV32-NEXT: bgeu a3, a0, .LBB185_402 +; RV32-NEXT: .LBB185_95: +; RV32-NEXT: beq a1, a5, .LBB185_97 +; RV32-NEXT: .LBB185_96: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_97: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a5, a1, .LBB185_403 +; RV32-NEXT: # %bb.98: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a4, a1, .LBB185_404 +; RV32-NEXT: .LBB185_99: +; RV32-NEXT: bgeu a6, a0, .LBB185_405 +; RV32-NEXT: .LBB185_100: +; RV32-NEXT: beq a1, a4, .LBB185_102 +; RV32-NEXT: .LBB185_101: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_102: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 136(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a4, a1, .LBB185_406 +; RV32-NEXT: # %bb.103: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s2, a1, .LBB185_407 +; RV32-NEXT: .LBB185_104: +; RV32-NEXT: bgeu s7, a0, .LBB185_408 +; RV32-NEXT: .LBB185_105: +; RV32-NEXT: bne a1, s2, .LBB185_409 +; RV32-NEXT: .LBB185_106: +; RV32-NEXT: bge s2, a1, .LBB185_410 +; RV32-NEXT: .LBB185_107: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s6, a1, .LBB185_109 +; RV32-NEXT: .LBB185_108: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 332(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_109: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s7, 332(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s7, a0, .LBB185_411 +; RV32-NEXT: # %bb.110: +; RV32-NEXT: bne a1, s6, .LBB185_412 +; RV32-NEXT: .LBB185_111: +; RV32-NEXT: blt s6, a1, .LBB185_113 +; RV32-NEXT: .LBB185_112: +; RV32-NEXT: mv a1, s6 +; RV32-NEXT: .LBB185_113: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a5, 396(a4) # 4-byte Folded Reload +; RV32-NEXT: blt a5, a1, .LBB185_115 +; RV32-NEXT: # %bb.114: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB185_115: +; RV32-NEXT: bltu a3, a0, .LBB185_117 +; RV32-NEXT: # %bb.116: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB185_117: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 396(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 88(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB185_119 +; RV32-NEXT: # %bb.118: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_119: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 396(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 112(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_413 +; RV32-NEXT: # %bb.120: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s3, a1, .LBB185_414 +; RV32-NEXT: .LBB185_121: +; RV32-NEXT: bgeu s11, a0, .LBB185_415 +; RV32-NEXT: .LBB185_122: +; RV32-NEXT: bne a1, s3, .LBB185_416 +; RV32-NEXT: .LBB185_123: +; RV32-NEXT: bge s3, a1, .LBB185_417 +; RV32-NEXT: .LBB185_124: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s10, a1, .LBB185_126 +; RV32-NEXT: .LBB185_125: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 336(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_126: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 336(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s11, a0, .LBB185_418 +; RV32-NEXT: # %bb.127: +; RV32-NEXT: bne a1, s10, .LBB185_419 +; RV32-NEXT: .LBB185_128: +; RV32-NEXT: bge s10, a1, .LBB185_420 +; RV32-NEXT: .LBB185_129: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt t1, a1, .LBB185_131 +; RV32-NEXT: .LBB185_130: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 340(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_131: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s10, 340(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s10, a0, .LBB185_421 +; RV32-NEXT: # %bb.132: +; RV32-NEXT: bne a1, t1, .LBB185_422 +; RV32-NEXT: .LBB185_133: +; RV32-NEXT: bge t1, a1, .LBB185_423 +; RV32-NEXT: .LBB185_134: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s5, a1, .LBB185_136 +; RV32-NEXT: .LBB185_135: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 344(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_136: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t1, 344(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu t1, a0, .LBB185_424 +; RV32-NEXT: # %bb.137: +; RV32-NEXT: bne a1, s5, .LBB185_425 +; RV32-NEXT: .LBB185_138: +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: bge s5, a1, .LBB185_426 +; RV32-NEXT: .LBB185_139: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt t6, a1, .LBB185_141 +; RV32-NEXT: .LBB185_140: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 348(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_141: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s5, 348(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s5, a0, .LBB185_427 +; RV32-NEXT: # %bb.142: +; RV32-NEXT: bne a1, t6, .LBB185_428 +; RV32-NEXT: .LBB185_143: +; RV32-NEXT: blt t6, a1, .LBB185_145 +; RV32-NEXT: .LBB185_144: +; RV32-NEXT: mv a1, t6 +; RV32-NEXT: .LBB185_145: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: blt a4, a1, .LBB185_147 +; RV32-NEXT: # %bb.146: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB185_147: +; RV32-NEXT: bltu a5, a0, .LBB185_149 +; RV32-NEXT: # %bb.148: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB185_149: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 124(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB185_151 +; RV32-NEXT: # %bb.150: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_151: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 400(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 128(a5) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_429 +; RV32-NEXT: # %bb.152: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s8, a1, .LBB185_430 +; RV32-NEXT: .LBB185_153: +; RV32-NEXT: bgeu a3, a0, .LBB185_431 +; RV32-NEXT: .LBB185_154: +; RV32-NEXT: bne a1, s8, .LBB185_432 +; RV32-NEXT: .LBB185_155: +; RV32-NEXT: bge s8, a1, .LBB185_433 +; RV32-NEXT: .LBB185_156: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt t3, a1, .LBB185_158 +; RV32-NEXT: .LBB185_157: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 352(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_158: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 352(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu s9, a0, .LBB185_434 +; RV32-NEXT: # %bb.159: +; RV32-NEXT: bne a1, t3, .LBB185_435 +; RV32-NEXT: .LBB185_160: +; RV32-NEXT: blt t3, a1, .LBB185_162 +; RV32-NEXT: .LBB185_161: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB185_162: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a3, a1, .LBB185_164 +; RV32-NEXT: # %bb.163: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB185_164: +; RV32-NEXT: bltu a4, a0, .LBB185_166 +; RV32-NEXT: # %bb.165: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB185_166: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s10, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 164(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s10, .LBB185_168 +; RV32-NEXT: # %bb.167: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_168: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 404(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 144(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_436 +; RV32-NEXT: # %bb.169: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a7, a1, .LBB185_437 +; RV32-NEXT: .LBB185_170: +; RV32-NEXT: bgeu a5, a0, .LBB185_438 +; RV32-NEXT: .LBB185_171: +; RV32-NEXT: beq a1, a7, .LBB185_173 +; RV32-NEXT: .LBB185_172: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_173: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 156(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a7, a1, .LBB185_175 +; RV32-NEXT: # %bb.174: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: .LBB185_175: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s1, a1, .LBB185_177 +; RV32-NEXT: # %bb.176: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 356(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_177: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s8, 356(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s2, 196(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw a7, 220(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu s8, a0, .LBB185_439 +; RV32-NEXT: # %bb.178: +; RV32-NEXT: bne a1, s1, .LBB185_440 +; RV32-NEXT: .LBB185_179: +; RV32-NEXT: bge s1, a1, .LBB185_441 +; RV32-NEXT: .LBB185_180: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s4, a1, .LBB185_182 +; RV32-NEXT: .LBB185_181: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 360(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_182: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t5, 360(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu t5, a0, .LBB185_442 +; RV32-NEXT: # %bb.183: +; RV32-NEXT: bne a1, s4, .LBB185_443 +; RV32-NEXT: .LBB185_184: +; RV32-NEXT: blt s4, a1, .LBB185_186 +; RV32-NEXT: .LBB185_185: +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: .LBB185_186: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s0, 408(a6) # 4-byte Folded Reload +; RV32-NEXT: blt s0, a1, .LBB185_188 +; RV32-NEXT: # %bb.187: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB185_188: +; RV32-NEXT: bltu a4, a0, .LBB185_190 +; RV32-NEXT: # %bb.189: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB185_190: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 408(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s11, .LBB185_192 +; RV32-NEXT: # %bb.191: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_192: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 408(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 148(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_444 +; RV32-NEXT: # %bb.193: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge t0, a1, .LBB185_445 +; RV32-NEXT: .LBB185_194: +; RV32-NEXT: bgeu a4, a0, .LBB185_446 +; RV32-NEXT: .LBB185_195: +; RV32-NEXT: beq a1, t0, .LBB185_197 +; RV32-NEXT: .LBB185_196: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_197: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 168(a2) # 4-byte Folded Reload +; RV32-NEXT: blt t0, a1, .LBB185_199 +; RV32-NEXT: # %bb.198: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB185_199: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a5, a1, .LBB185_201 +; RV32-NEXT: # %bb.200: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 364(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_201: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t0, 364(a6) # 4-byte Folded Reload +; RV32-NEXT: bltu t0, a0, .LBB185_203 +; RV32-NEXT: # %bb.202: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: .LBB185_203: +; RV32-NEXT: beq a1, a5, .LBB185_205 +; RV32-NEXT: # %bb.204: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_205: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 200(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 184(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 204(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a5, a1, .LBB185_207 +; RV32-NEXT: # %bb.206: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB185_207: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 160(a5) # 4-byte Folded Reload +; RV32-NEXT: blt a5, a1, .LBB185_209 +; RV32-NEXT: # %bb.208: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 368(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_209: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s4, 368(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu s4, a0, .LBB185_447 +; RV32-NEXT: # %bb.210: +; RV32-NEXT: bne a1, a5, .LBB185_448 +; RV32-NEXT: .LBB185_211: +; RV32-NEXT: blt a5, a1, .LBB185_213 +; RV32-NEXT: .LBB185_212: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB185_213: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 412(a5) # 4-byte Folded Reload +; RV32-NEXT: blt a5, a1, .LBB185_215 +; RV32-NEXT: # %bb.214: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB185_215: +; RV32-NEXT: bltu a3, a0, .LBB185_217 +; RV32-NEXT: # %bb.216: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB185_217: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 412(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 216(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t5, .LBB185_219 +; RV32-NEXT: # %bb.218: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_219: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 412(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 180(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_449 +; RV32-NEXT: # %bb.220: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge ra, a1, .LBB185_450 +; RV32-NEXT: .LBB185_221: +; RV32-NEXT: bgeu a4, a0, .LBB185_451 +; RV32-NEXT: .LBB185_222: +; RV32-NEXT: beq a1, ra, .LBB185_224 +; RV32-NEXT: .LBB185_223: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_224: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 176(a2) # 4-byte Folded Reload +; RV32-NEXT: blt ra, a1, .LBB185_226 +; RV32-NEXT: # %bb.225: +; RV32-NEXT: mv a1, ra +; RV32-NEXT: .LBB185_226: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a4, a1, .LBB185_228 +; RV32-NEXT: # %bb.227: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 372(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_228: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s7, 372(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu s7, a0, .LBB185_452 +; RV32-NEXT: # %bb.229: +; RV32-NEXT: bne a1, a4, .LBB185_453 +; RV32-NEXT: .LBB185_230: +; RV32-NEXT: blt a4, a1, .LBB185_232 +; RV32-NEXT: .LBB185_231: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB185_232: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 416(a4) # 4-byte Folded Reload +; RV32-NEXT: blt a4, a1, .LBB185_234 +; RV32-NEXT: # %bb.233: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB185_234: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a6, 244(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a3, a0, .LBB185_236 +; RV32-NEXT: # %bb.235: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB185_236: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 416(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 264(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 212(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s9, .LBB185_238 +; RV32-NEXT: # %bb.237: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_238: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 416(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 188(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_454 +; RV32-NEXT: # %bb.239: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a3, a1, .LBB185_455 +; RV32-NEXT: .LBB185_240: +; RV32-NEXT: bgeu s0, a0, .LBB185_456 +; RV32-NEXT: .LBB185_241: +; RV32-NEXT: bne a1, a3, .LBB185_457 +; RV32-NEXT: .LBB185_242: +; RV32-NEXT: blt a3, a1, .LBB185_244 +; RV32-NEXT: .LBB185_243: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB185_244: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 192(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a3, a1, .LBB185_246 +; RV32-NEXT: # %bb.245: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 376(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_246: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw s11, 376(t2) # 4-byte Folded Reload +; RV32-NEXT: bgeu s11, a0, .LBB185_458 +; RV32-NEXT: # %bb.247: +; RV32-NEXT: bne a1, a3, .LBB185_459 +; RV32-NEXT: .LBB185_248: +; RV32-NEXT: blt a3, a1, .LBB185_250 +; RV32-NEXT: .LBB185_249: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB185_250: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a3, a1, .LBB185_252 +; RV32-NEXT: # %bb.251: +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: .LBB185_252: +; RV32-NEXT: bltu s2, a0, .LBB185_254 +; RV32-NEXT: # %bb.253: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: .LBB185_254: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t2, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul a3, a3, t3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 232(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t2, .LBB185_256 +; RV32-NEXT: # %bb.255: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_256: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 420(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a2, a1, .LBB185_258 +; RV32-NEXT: # %bb.257: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB185_258: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 228(a3) # 4-byte Folded Reload +; RV32-NEXT: bge s1, a1, .LBB185_460 +; RV32-NEXT: # %bb.259: +; RV32-NEXT: bgeu t0, a0, .LBB185_461 +; RV32-NEXT: .LBB185_260: +; RV32-NEXT: bne a1, s1, .LBB185_462 +; RV32-NEXT: .LBB185_261: +; RV32-NEXT: bge s1, a1, .LBB185_463 +; RV32-NEXT: .LBB185_262: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt t1, a1, .LBB185_264 +; RV32-NEXT: .LBB185_263: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a2, a2, t0 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 380(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_264: +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul t0, t0, t2 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw t0, 380(t0) # 4-byte Folded Reload +; RV32-NEXT: bgeu t0, a0, .LBB185_464 +; RV32-NEXT: # %bb.265: +; RV32-NEXT: bne a1, t1, .LBB185_465 +; RV32-NEXT: .LBB185_266: +; RV32-NEXT: blt t1, a1, .LBB185_268 +; RV32-NEXT: .LBB185_267: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB185_268: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t1, zero, 232 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw s0, 424(t0) # 4-byte Folded Reload +; RV32-NEXT: blt s0, a1, .LBB185_270 +; RV32-NEXT: # %bb.269: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB185_270: +; RV32-NEXT: bltu a4, a0, .LBB185_272 +; RV32-NEXT: # %bb.271: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB185_272: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t3, 424(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t3, .LBB185_274 +; RV32-NEXT: # %bb.273: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_274: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 424(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 224(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a2, a1, .LBB185_466 +; RV32-NEXT: # %bb.275: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a7, a1, .LBB185_467 +; RV32-NEXT: .LBB185_276: +; RV32-NEXT: bgeu a5, a0, .LBB185_468 +; RV32-NEXT: .LBB185_277: +; RV32-NEXT: bne a1, a7, .LBB185_469 +; RV32-NEXT: .LBB185_278: +; RV32-NEXT: bge a7, a1, .LBB185_470 +; RV32-NEXT: .LBB185_279: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a4, a1, .LBB185_281 +; RV32-NEXT: .LBB185_280: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 384(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_281: +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t1, 384(a5) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t0, 312(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu t1, a0, .LBB185_283 +; RV32-NEXT: # %bb.282: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: .LBB185_283: +; RV32-NEXT: beq a1, a4, .LBB185_285 +; RV32-NEXT: # %bb.284: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_285: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 236(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a4, a1, .LBB185_287 +; RV32-NEXT: # %bb.286: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB185_287: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a3, a1, .LBB185_289 +; RV32-NEXT: # %bb.288: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 388(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB185_289: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a7, 388(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a7, a0, .LBB185_291 +; RV32-NEXT: # %bb.290: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: .LBB185_291: +; RV32-NEXT: beq a1, a3, .LBB185_293 +; RV32-NEXT: # %bb.292: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_293: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 256(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 260(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 272(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 252(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 248(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a3, a1, .LBB185_471 +; RV32-NEXT: # %bb.294: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s1, a1, .LBB185_472 +; RV32-NEXT: .LBB185_295: +; RV32-NEXT: bgeu t4, a0, .LBB185_473 +; RV32-NEXT: .LBB185_296: +; RV32-NEXT: beq a1, s1, .LBB185_298 +; RV32-NEXT: .LBB185_297: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_298: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 240(a2) # 4-byte Folded Reload +; RV32-NEXT: bge s1, a1, .LBB185_474 +; RV32-NEXT: # %bb.299: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a6, a1, .LBB185_475 +; RV32-NEXT: .LBB185_300: +; RV32-NEXT: bgeu a3, a0, .LBB185_476 +; RV32-NEXT: .LBB185_301: +; RV32-NEXT: bne a1, a6, .LBB185_477 +; RV32-NEXT: .LBB185_302: +; RV32-NEXT: bge a6, a1, .LBB185_478 +; RV32-NEXT: .LBB185_303: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a4, a1, .LBB185_305 +; RV32-NEXT: .LBB185_304: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB185_305: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 304(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 284(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a0, .LBB185_307 +; RV32-NEXT: # %bb.306: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB185_307: +; RV32-NEXT: beq a1, a4, .LBB185_309 +; RV32-NEXT: # %bb.308: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_309: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 268(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a4, a1, .LBB185_479 +; RV32-NEXT: # %bb.310: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s0, a1, .LBB185_480 +; RV32-NEXT: .LBB185_311: +; RV32-NEXT: bgeu t1, a0, .LBB185_481 +; RV32-NEXT: .LBB185_312: +; RV32-NEXT: bne a1, s0, .LBB185_482 +; RV32-NEXT: .LBB185_313: +; RV32-NEXT: bge s0, a1, .LBB185_483 +; RV32-NEXT: .LBB185_314: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a5, a1, .LBB185_484 +; RV32-NEXT: .LBB185_315: +; RV32-NEXT: bgeu t5, a0, .LBB185_485 +; RV32-NEXT: .LBB185_316: +; RV32-NEXT: beq a1, a5, .LBB185_318 +; RV32-NEXT: .LBB185_317: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_318: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 276(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a5, a1, .LBB185_486 +; RV32-NEXT: # %bb.319: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a4, a1, .LBB185_487 +; RV32-NEXT: .LBB185_320: +; RV32-NEXT: bgeu a7, a0, .LBB185_488 +; RV32-NEXT: .LBB185_321: +; RV32-NEXT: beq a1, a4, .LBB185_323 +; RV32-NEXT: .LBB185_322: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_323: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 296(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 292(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a4, a1, .LBB185_325 +; RV32-NEXT: # %bb.324: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB185_325: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a4, a4, a7 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 280(a4) # 4-byte Folded Reload +; RV32-NEXT: bge a3, a1, .LBB185_489 +; RV32-NEXT: # %bb.326: +; RV32-NEXT: bgeu a4, a0, .LBB185_490 +; RV32-NEXT: .LBB185_327: +; RV32-NEXT: bne a1, a3, .LBB185_491 +; RV32-NEXT: .LBB185_328: +; RV32-NEXT: blt a3, a1, .LBB185_330 +; RV32-NEXT: .LBB185_329: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB185_330: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 288(a3) # 4-byte Folded Reload +; RV32-NEXT: blt a5, a1, .LBB185_332 +; RV32-NEXT: # %bb.331: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB185_332: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a3, a3, a7 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 316(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a0, .LBB185_334 +; RV32-NEXT: # %bb.333: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB185_334: +; RV32-NEXT: beq a1, a5, .LBB185_336 +; RV32-NEXT: # %bb.335: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_336: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 320(a2) # 4-byte Folded Reload +; RV32-NEXT: blt a5, a1, .LBB185_338 +; RV32-NEXT: # %bb.337: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB185_338: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 300(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a5, a1, .LBB185_492 +; RV32-NEXT: # %bb.339: +; RV32-NEXT: bgeu s1, a0, .LBB185_493 +; RV32-NEXT: .LBB185_340: +; RV32-NEXT: bne a1, a5, .LBB185_494 +; RV32-NEXT: .LBB185_341: +; RV32-NEXT: blt a5, a1, .LBB185_343 +; RV32-NEXT: .LBB185_342: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB185_343: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 308(a3) # 4-byte Folded Reload +; RV32-NEXT: bge a5, a1, .LBB185_495 +; RV32-NEXT: # %bb.344: +; RV32-NEXT: bgeu a6, a0, .LBB185_496 +; RV32-NEXT: .LBB185_345: +; RV32-NEXT: beq a1, a5, .LBB185_347 +; RV32-NEXT: .LBB185_346: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB185_347: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 324(a2) # 4-byte Folded Reload +; RV32-NEXT: bge a5, a1, .LBB185_497 +; RV32-NEXT: # %bb.348: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s0, a1, .LBB185_498 +; RV32-NEXT: .LBB185_349: +; RV32-NEXT: bgeu t0, a0, .LBB185_499 +; RV32-NEXT: .LBB185_350: +; RV32-NEXT: bne a1, s0, .LBB185_500 +; RV32-NEXT: .LBB185_351: +; RV32-NEXT: bge s0, a1, .LBB185_501 +; RV32-NEXT: .LBB185_352: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a3, a1, .LBB185_502 +; RV32-NEXT: .LBB185_353: +; RV32-NEXT: bgeu a4, a0, .LBB185_503 +; RV32-NEXT: .LBB185_354: +; RV32-NEXT: bne a1, a3, .LBB185_504 +; RV32-NEXT: .LBB185_355: +; RV32-NEXT: blt a3, a1, .LBB185_357 +; RV32-NEXT: .LBB185_356: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB185_357: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 428(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 432(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 436(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 440(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 444(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 448(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 452(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 456(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 460(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 464(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 468(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 472(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 476(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 480 +; RV32-NEXT: ret +; RV32-NEXT: .LBB185_358: +; RV32-NEXT: mv s4, t6 +; RV32-NEXT: bgeu t6, a0, .LBB185_359 +; RV32-NEXT: j .LBB185_2 +; RV32-NEXT: .LBB185_359: +; RV32-NEXT: mv a0, t6 +; RV32-NEXT: bne a1, a3, .LBB185_360 +; RV32-NEXT: j .LBB185_3 +; RV32-NEXT: .LBB185_360: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: bge a3, a1, .LBB185_361 +; RV32-NEXT: j .LBB185_4 +; RV32-NEXT: .LBB185_361: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bge a4, a1, .LBB185_362 +; RV32-NEXT: j .LBB185_5 +; RV32-NEXT: .LBB185_362: +; RV32-NEXT: mv a3, t4 +; RV32-NEXT: bgeu t4, a0, .LBB185_363 +; RV32-NEXT: j .LBB185_6 +; RV32-NEXT: .LBB185_363: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: beq a1, a4, .LBB185_505 +; RV32-NEXT: j .LBB185_7 +; RV32-NEXT: .LBB185_505: +; RV32-NEXT: j .LBB185_8 +; RV32-NEXT: .LBB185_364: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bge s10, a1, .LBB185_365 +; RV32-NEXT: j .LBB185_10 +; RV32-NEXT: .LBB185_365: +; RV32-NEXT: mv a4, a7 +; RV32-NEXT: bgeu a7, a0, .LBB185_366 +; RV32-NEXT: j .LBB185_11 +; RV32-NEXT: .LBB185_366: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, s10, .LBB185_367 +; RV32-NEXT: j .LBB185_12 +; RV32-NEXT: .LBB185_367: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge s10, a1, .LBB185_368 +; RV32-NEXT: j .LBB185_13 +; RV32-NEXT: .LBB185_368: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt s3, a1, .LBB185_506 +; RV32-NEXT: j .LBB185_14 +; RV32-NEXT: .LBB185_506: +; RV32-NEXT: j .LBB185_15 +; RV32-NEXT: .LBB185_369: +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: bne a1, s3, .LBB185_370 +; RV32-NEXT: j .LBB185_17 +; RV32-NEXT: .LBB185_370: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge s3, a1, .LBB185_371 +; RV32-NEXT: j .LBB185_18 +; RV32-NEXT: .LBB185_371: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bge a5, a1, .LBB185_372 +; RV32-NEXT: j .LBB185_19 +; RV32-NEXT: .LBB185_372: +; RV32-NEXT: mv a4, t5 +; RV32-NEXT: bgeu t5, a0, .LBB185_373 +; RV32-NEXT: j .LBB185_20 +; RV32-NEXT: .LBB185_373: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB185_374 +; RV32-NEXT: j .LBB185_21 +; RV32-NEXT: .LBB185_374: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge a5, a1, .LBB185_375 +; RV32-NEXT: j .LBB185_22 +; RV32-NEXT: .LBB185_375: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bge t1, a1, .LBB185_376 +; RV32-NEXT: j .LBB185_23 +; RV32-NEXT: .LBB185_376: +; RV32-NEXT: mv a4, s5 +; RV32-NEXT: bgeu s5, a0, .LBB185_377 +; RV32-NEXT: j .LBB185_24 +; RV32-NEXT: .LBB185_377: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t1, .LBB185_507 +; RV32-NEXT: j .LBB185_25 +; RV32-NEXT: .LBB185_507: +; RV32-NEXT: j .LBB185_26 +; RV32-NEXT: .LBB185_378: +; RV32-NEXT: mv a0, ra +; RV32-NEXT: bne a1, s1, .LBB185_379 +; RV32-NEXT: j .LBB185_32 +; RV32-NEXT: .LBB185_379: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bge s1, a1, .LBB185_380 +; RV32-NEXT: j .LBB185_33 +; RV32-NEXT: .LBB185_380: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt s11, a1, .LBB185_508 +; RV32-NEXT: j .LBB185_34 +; RV32-NEXT: .LBB185_508: +; RV32-NEXT: j .LBB185_35 +; RV32-NEXT: .LBB185_381: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: bne a1, t3, .LBB185_382 +; RV32-NEXT: j .LBB185_45 +; RV32-NEXT: .LBB185_382: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: blt t3, a1, .LBB185_509 +; RV32-NEXT: j .LBB185_46 +; RV32-NEXT: .LBB185_509: +; RV32-NEXT: j .LBB185_47 +; RV32-NEXT: .LBB185_383: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, a6, .LBB185_384 +; RV32-NEXT: j .LBB185_59 +; RV32-NEXT: .LBB185_384: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: bge a6, a1, .LBB185_385 +; RV32-NEXT: j .LBB185_60 +; RV32-NEXT: .LBB185_385: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: blt t2, a1, .LBB185_510 +; RV32-NEXT: j .LBB185_61 +; RV32-NEXT: .LBB185_510: +; RV32-NEXT: j .LBB185_62 +; RV32-NEXT: .LBB185_386: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: bne a1, t2, .LBB185_387 +; RV32-NEXT: j .LBB185_64 +; RV32-NEXT: .LBB185_387: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: blt t2, a1, .LBB185_511 +; RV32-NEXT: j .LBB185_65 +; RV32-NEXT: .LBB185_511: +; RV32-NEXT: j .LBB185_66 +; RV32-NEXT: .LBB185_388: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bgeu a5, a0, .LBB185_389 +; RV32-NEXT: j .LBB185_68 +; RV32-NEXT: .LBB185_389: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, s1, .LBB185_390 +; RV32-NEXT: j .LBB185_69 +; RV32-NEXT: .LBB185_390: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge s1, a1, .LBB185_391 +; RV32-NEXT: j .LBB185_70 +; RV32-NEXT: .LBB185_391: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: blt s0, a1, .LBB185_512 +; RV32-NEXT: j .LBB185_71 +; RV32-NEXT: .LBB185_512: +; RV32-NEXT: j .LBB185_72 +; RV32-NEXT: .LBB185_392: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, s0, .LBB185_393 +; RV32-NEXT: j .LBB185_74 +; RV32-NEXT: .LBB185_393: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bge s0, a1, .LBB185_394 +; RV32-NEXT: j .LBB185_75 +; RV32-NEXT: .LBB185_394: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: blt s1, a1, .LBB185_513 +; RV32-NEXT: j .LBB185_76 +; RV32-NEXT: .LBB185_513: +; RV32-NEXT: j .LBB185_77 +; RV32-NEXT: .LBB185_395: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beq a1, s1, .LBB185_79 +; RV32-NEXT: .LBB185_396: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: blt s1, a1, .LBB185_80 +; RV32-NEXT: .LBB185_397: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge t5, a1, .LBB185_81 +; RV32-NEXT: j .LBB185_82 +; RV32-NEXT: .LBB185_398: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t5, .LBB185_84 +; RV32-NEXT: .LBB185_399: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge t5, a1, .LBB185_85 +; RV32-NEXT: j .LBB185_86 +; RV32-NEXT: .LBB185_400: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a5, a1, .LBB185_94 +; RV32-NEXT: .LBB185_401: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a3, a0, .LBB185_95 +; RV32-NEXT: .LBB185_402: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: bne a1, a5, .LBB185_96 +; RV32-NEXT: j .LBB185_97 +; RV32-NEXT: .LBB185_403: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a4, a1, .LBB185_99 +; RV32-NEXT: .LBB185_404: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a6, a0, .LBB185_100 +; RV32-NEXT: .LBB185_405: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a4, .LBB185_101 +; RV32-NEXT: j .LBB185_102 +; RV32-NEXT: .LBB185_406: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s2, a1, .LBB185_104 +; RV32-NEXT: .LBB185_407: +; RV32-NEXT: mv a2, s7 +; RV32-NEXT: bltu s7, a0, .LBB185_105 +; RV32-NEXT: .LBB185_408: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s2, .LBB185_106 +; RV32-NEXT: .LBB185_409: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s2, a1, .LBB185_107 +; RV32-NEXT: .LBB185_410: +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s6, a1, .LBB185_108 +; RV32-NEXT: j .LBB185_109 +; RV32-NEXT: .LBB185_411: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s6, .LBB185_111 +; RV32-NEXT: .LBB185_412: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge s6, a1, .LBB185_112 +; RV32-NEXT: j .LBB185_113 +; RV32-NEXT: .LBB185_413: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s3, a1, .LBB185_121 +; RV32-NEXT: .LBB185_414: +; RV32-NEXT: mv a2, s11 +; RV32-NEXT: bltu s11, a0, .LBB185_122 +; RV32-NEXT: .LBB185_415: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s3, .LBB185_123 +; RV32-NEXT: .LBB185_416: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s3, a1, .LBB185_124 +; RV32-NEXT: .LBB185_417: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s10, a1, .LBB185_125 +; RV32-NEXT: j .LBB185_126 +; RV32-NEXT: .LBB185_418: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s10, .LBB185_128 +; RV32-NEXT: .LBB185_419: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s10, a1, .LBB185_129 +; RV32-NEXT: .LBB185_420: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge t1, a1, .LBB185_130 +; RV32-NEXT: j .LBB185_131 +; RV32-NEXT: .LBB185_421: +; RV32-NEXT: mv a0, s10 +; RV32-NEXT: beq a1, t1, .LBB185_133 +; RV32-NEXT: .LBB185_422: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt t1, a1, .LBB185_134 +; RV32-NEXT: .LBB185_423: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s5, a1, .LBB185_135 +; RV32-NEXT: j .LBB185_136 +; RV32-NEXT: .LBB185_424: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s5, .LBB185_138 +; RV32-NEXT: .LBB185_425: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: blt s5, a1, .LBB185_139 +; RV32-NEXT: .LBB185_426: +; RV32-NEXT: mv a1, s5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge t6, a1, .LBB185_140 +; RV32-NEXT: j .LBB185_141 +; RV32-NEXT: .LBB185_427: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t6, .LBB185_143 +; RV32-NEXT: .LBB185_428: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge t6, a1, .LBB185_144 +; RV32-NEXT: j .LBB185_145 +; RV32-NEXT: .LBB185_429: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s8, a1, .LBB185_153 +; RV32-NEXT: .LBB185_430: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a3, a0, .LBB185_154 +; RV32-NEXT: .LBB185_431: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, s8, .LBB185_155 +; RV32-NEXT: .LBB185_432: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s8, a1, .LBB185_156 +; RV32-NEXT: .LBB185_433: +; RV32-NEXT: mv a1, s8 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge t3, a1, .LBB185_157 +; RV32-NEXT: j .LBB185_158 +; RV32-NEXT: .LBB185_434: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t3, .LBB185_160 +; RV32-NEXT: .LBB185_435: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge t3, a1, .LBB185_161 +; RV32-NEXT: j .LBB185_162 +; RV32-NEXT: .LBB185_436: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a7, a1, .LBB185_170 +; RV32-NEXT: .LBB185_437: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a5, a0, .LBB185_171 +; RV32-NEXT: .LBB185_438: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, a7, .LBB185_172 +; RV32-NEXT: j .LBB185_173 +; RV32-NEXT: .LBB185_439: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: beq a1, s1, .LBB185_179 +; RV32-NEXT: .LBB185_440: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s1, a1, .LBB185_180 +; RV32-NEXT: .LBB185_441: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge s4, a1, .LBB185_181 +; RV32-NEXT: j .LBB185_182 +; RV32-NEXT: .LBB185_442: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: beq a1, s4, .LBB185_184 +; RV32-NEXT: .LBB185_443: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge s4, a1, .LBB185_185 +; RV32-NEXT: j .LBB185_186 +; RV32-NEXT: .LBB185_444: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt t0, a1, .LBB185_194 +; RV32-NEXT: .LBB185_445: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB185_195 +; RV32-NEXT: .LBB185_446: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, t0, .LBB185_196 +; RV32-NEXT: j .LBB185_197 +; RV32-NEXT: .LBB185_447: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: beq a1, a5, .LBB185_211 +; RV32-NEXT: .LBB185_448: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a5, a1, .LBB185_212 +; RV32-NEXT: j .LBB185_213 +; RV32-NEXT: .LBB185_449: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt ra, a1, .LBB185_221 +; RV32-NEXT: .LBB185_450: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB185_222 +; RV32-NEXT: .LBB185_451: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, ra, .LBB185_223 +; RV32-NEXT: j .LBB185_224 +; RV32-NEXT: .LBB185_452: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, a4, .LBB185_230 +; RV32-NEXT: .LBB185_453: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a4, a1, .LBB185_231 +; RV32-NEXT: j .LBB185_232 +; RV32-NEXT: .LBB185_454: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a3, a1, .LBB185_240 +; RV32-NEXT: .LBB185_455: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: bltu s0, a0, .LBB185_241 +; RV32-NEXT: .LBB185_456: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: beq a1, a3, .LBB185_242 +; RV32-NEXT: .LBB185_457: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a3, a1, .LBB185_243 +; RV32-NEXT: j .LBB185_244 +; RV32-NEXT: .LBB185_458: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, a3, .LBB185_248 +; RV32-NEXT: .LBB185_459: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a3, a1, .LBB185_249 +; RV32-NEXT: j .LBB185_250 +; RV32-NEXT: .LBB185_460: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu t0, a0, .LBB185_260 +; RV32-NEXT: .LBB185_461: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s1, .LBB185_261 +; RV32-NEXT: .LBB185_462: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s1, a1, .LBB185_262 +; RV32-NEXT: .LBB185_463: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge t1, a1, .LBB185_263 +; RV32-NEXT: j .LBB185_264 +; RV32-NEXT: .LBB185_464: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, t1, .LBB185_266 +; RV32-NEXT: .LBB185_465: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge t1, a1, .LBB185_267 +; RV32-NEXT: j .LBB185_268 +; RV32-NEXT: .LBB185_466: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a7, a1, .LBB185_276 +; RV32-NEXT: .LBB185_467: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a5, a0, .LBB185_277 +; RV32-NEXT: .LBB185_468: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: beq a1, a7, .LBB185_278 +; RV32-NEXT: .LBB185_469: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a7, a1, .LBB185_279 +; RV32-NEXT: .LBB185_470: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a4, a1, .LBB185_280 +; RV32-NEXT: j .LBB185_281 +; RV32-NEXT: .LBB185_471: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s1, a1, .LBB185_295 +; RV32-NEXT: .LBB185_472: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: bltu t4, a0, .LBB185_296 +; RV32-NEXT: .LBB185_473: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: bne a1, s1, .LBB185_297 +; RV32-NEXT: j .LBB185_298 +; RV32-NEXT: .LBB185_474: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a6, a1, .LBB185_300 +; RV32-NEXT: .LBB185_475: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a3, a0, .LBB185_301 +; RV32-NEXT: .LBB185_476: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, a6, .LBB185_302 +; RV32-NEXT: .LBB185_477: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt a6, a1, .LBB185_303 +; RV32-NEXT: .LBB185_478: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bge a4, a1, .LBB185_304 +; RV32-NEXT: j .LBB185_305 +; RV32-NEXT: .LBB185_479: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s0, a1, .LBB185_311 +; RV32-NEXT: .LBB185_480: +; RV32-NEXT: mv a2, t1 +; RV32-NEXT: bltu t1, a0, .LBB185_312 +; RV32-NEXT: .LBB185_481: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s0, .LBB185_313 +; RV32-NEXT: .LBB185_482: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s0, a1, .LBB185_314 +; RV32-NEXT: .LBB185_483: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a5, a1, .LBB185_315 +; RV32-NEXT: .LBB185_484: +; RV32-NEXT: mv a2, t5 +; RV32-NEXT: bltu t5, a0, .LBB185_316 +; RV32-NEXT: .LBB185_485: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB185_317 +; RV32-NEXT: j .LBB185_318 +; RV32-NEXT: .LBB185_486: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a4, a1, .LBB185_320 +; RV32-NEXT: .LBB185_487: +; RV32-NEXT: mv a2, a7 +; RV32-NEXT: bltu a7, a0, .LBB185_321 +; RV32-NEXT: .LBB185_488: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, a4, .LBB185_322 +; RV32-NEXT: j .LBB185_323 +; RV32-NEXT: .LBB185_489: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB185_327 +; RV32-NEXT: .LBB185_490: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB185_328 +; RV32-NEXT: .LBB185_491: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a3, a1, .LBB185_329 +; RV32-NEXT: j .LBB185_330 +; RV32-NEXT: .LBB185_492: +; RV32-NEXT: mv a2, s1 +; RV32-NEXT: bltu s1, a0, .LBB185_340 +; RV32-NEXT: .LBB185_493: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: beq a1, a5, .LBB185_341 +; RV32-NEXT: .LBB185_494: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a5, a1, .LBB185_342 +; RV32-NEXT: j .LBB185_343 +; RV32-NEXT: .LBB185_495: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a6, a0, .LBB185_345 +; RV32-NEXT: .LBB185_496: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a5, .LBB185_346 +; RV32-NEXT: j .LBB185_347 +; RV32-NEXT: .LBB185_497: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt s0, a1, .LBB185_349 +; RV32-NEXT: .LBB185_498: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu t0, a0, .LBB185_350 +; RV32-NEXT: .LBB185_499: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s0, .LBB185_351 +; RV32-NEXT: .LBB185_500: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: blt s0, a1, .LBB185_352 +; RV32-NEXT: .LBB185_501: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: blt a3, a1, .LBB185_353 +; RV32-NEXT: .LBB185_502: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB185_354 +; RV32-NEXT: .LBB185_503: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB185_355 +; RV32-NEXT: .LBB185_504: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bge a3, a1, .LBB185_356 +; RV32-NEXT: j .LBB185_357 +; +; RV64-LABEL: vreduce_smax_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vmax.vv v16, v24, v16 +; RV64-NEXT: vmax.vv v8, v8, v0 +; RV64-NEXT: vmax.vv v8, v8, v16 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmax.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>) + +define i8 @vreduce_umin_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) + +define i8 @vreduce_umin_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) + +define i8 @vreduce_umin_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) + +define i8 @vreduce_umin_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) + +define i8 @vreduce_umin_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) + +define i8 @vreduce_umin_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) + +define i8 @vreduce_umin_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) + +define i8 @vreduce_umin_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umin.v256i8(<256 x i8>) + +define i8 @vreduce_umin_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_umin_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.umin.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.umin.v1i16(<1 x i16>) + +define i16 @vreduce_umin_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) + +define i16 @vreduce_umin_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) + +define i16 @vreduce_umin_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) + +define i16 @vreduce_umin_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) + +define i16 @vreduce_umin_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) + +define i16 @vreduce_umin_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) + +define i16 @vreduce_umin_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umin.v128i16(<128 x i16>) + +define i16 @vreduce_umin_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_umin_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.umin.v1i32(<1 x i32>) + +define i32 @vreduce_umin_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) + +define i32 @vreduce_umin_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) + +define i32 @vreduce_umin_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredminu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) + +define i32 @vreduce_umin_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredminu.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) + +define i32 @vreduce_umin_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredminu.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) + +define i32 @vreduce_umin_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umin.v64i32(<64 x i32>) + +define i32 @vreduce_umin_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_umin_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, -1 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredminu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>) + +define i64 @vreduce_umin_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_umin_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umin_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) + +define i64 @vreduce_umin_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_umin_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredminu.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umin_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, -1 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredminu.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) + +define i64 @vreduce_umin_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_umin_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredminu.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umin_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredminu.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) + +define i64 @vreduce_umin_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_umin_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredminu.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umin_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredminu.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) + +define i64 @vreduce_umin_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_umin_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, -1 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredminu.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umin_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredminu.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.v32i64(<32 x i64>) + +define i64 @vreduce_umin_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_umin_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vminu.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vminu.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vminu.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vminu.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vminu.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umin_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vminu.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredminu.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umin.v64i64(<64 x i64>) + +define i64 @vreduce_umin_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_umin_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -480 +; RV32-NEXT: sw ra, 476(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 472(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 468(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 464(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 460(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 456(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 452(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 448(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 444(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 440(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 436(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 432(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 428(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 320(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 324(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 312(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 316(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 304(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 308(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 296(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 300(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 288(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 292(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 280(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 284(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 272(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 276(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 264(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 268(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 256(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 260(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 248(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 252(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 240(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 244(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 232(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 236(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 388(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 228(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 384(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 224(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 216(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 220(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 212(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 424(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 380(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 208(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 200(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 204(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 196(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 420(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 376(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 192(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 184(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 188(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 180(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 416(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 372(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 176(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 168(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 172(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 164(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 412(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 368(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 160(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 364(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 156(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 148(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 152(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 144(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 408(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 360(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 140(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 356(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 136(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 128(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 132(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 124(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 404(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 352(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 120(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 112(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 116(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 400(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 348(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 108(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 344(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 340(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 336(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s9, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 396(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 332(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v0 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 392(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 328(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 40(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 44(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 32(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 36(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 28(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 12(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 24(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 20(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 16(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s11, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s ra, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s10, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v0 +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v0 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t6, zero, 24 +; RV32-NEXT: mul a2, a2, t6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: mv s4, a0 +; RV32-NEXT: bltu a1, a3, .LBB216_1 +; RV32-NEXT: j .LBB216_358 +; RV32-NEXT: .LBB216_1: +; RV32-NEXT: bltu a0, t6, .LBB216_2 +; RV32-NEXT: j .LBB216_359 +; RV32-NEXT: .LBB216_2: +; RV32-NEXT: beq a1, a3, .LBB216_3 +; RV32-NEXT: j .LBB216_360 +; RV32-NEXT: .LBB216_3: +; RV32-NEXT: bltu a1, a3, .LBB216_4 +; RV32-NEXT: j .LBB216_361 +; RV32-NEXT: .LBB216_4: +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bltu a1, a4, .LBB216_5 +; RV32-NEXT: j .LBB216_362 +; RV32-NEXT: .LBB216_5: +; RV32-NEXT: bltu a0, t4, .LBB216_6 +; RV32-NEXT: j .LBB216_363 +; RV32-NEXT: .LBB216_6: +; RV32-NEXT: beq a1, a4, .LBB216_8 +; RV32-NEXT: .LBB216_7: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB216_8: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s0, 88(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s9, 52(a2) # 4-byte Folded Spill +; RV32-NEXT: mv t4, s7 +; RV32-NEXT: bltu a1, a4, .LBB216_9 +; RV32-NEXT: j .LBB216_364 +; RV32-NEXT: .LBB216_9: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, s10, .LBB216_10 +; RV32-NEXT: j .LBB216_365 +; RV32-NEXT: .LBB216_10: +; RV32-NEXT: bltu a0, a7, .LBB216_11 +; RV32-NEXT: j .LBB216_366 +; RV32-NEXT: .LBB216_11: +; RV32-NEXT: beq a1, s10, .LBB216_12 +; RV32-NEXT: j .LBB216_367 +; RV32-NEXT: .LBB216_12: +; RV32-NEXT: bltu a1, s10, .LBB216_13 +; RV32-NEXT: j .LBB216_368 +; RV32-NEXT: .LBB216_13: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, s3, .LBB216_15 +; RV32-NEXT: .LBB216_14: +; RV32-NEXT: mv a4, s6 +; RV32-NEXT: .LBB216_15: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s10, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s6, .LBB216_16 +; RV32-NEXT: j .LBB216_369 +; RV32-NEXT: .LBB216_16: +; RV32-NEXT: beq a1, s3, .LBB216_17 +; RV32-NEXT: j .LBB216_370 +; RV32-NEXT: .LBB216_17: +; RV32-NEXT: bltu a1, s3, .LBB216_18 +; RV32-NEXT: j .LBB216_371 +; RV32-NEXT: .LBB216_18: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, a5, .LBB216_19 +; RV32-NEXT: j .LBB216_372 +; RV32-NEXT: .LBB216_19: +; RV32-NEXT: bltu a0, t5, .LBB216_20 +; RV32-NEXT: j .LBB216_373 +; RV32-NEXT: .LBB216_20: +; RV32-NEXT: beq a1, a5, .LBB216_21 +; RV32-NEXT: j .LBB216_374 +; RV32-NEXT: .LBB216_21: +; RV32-NEXT: bltu a1, a5, .LBB216_22 +; RV32-NEXT: j .LBB216_375 +; RV32-NEXT: .LBB216_22: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, t1, .LBB216_23 +; RV32-NEXT: j .LBB216_376 +; RV32-NEXT: .LBB216_23: +; RV32-NEXT: bltu a0, s5, .LBB216_24 +; RV32-NEXT: j .LBB216_377 +; RV32-NEXT: .LBB216_24: +; RV32-NEXT: beq a1, t1, .LBB216_26 +; RV32-NEXT: .LBB216_25: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB216_26: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s5, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, t1, .LBB216_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB216_28: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, s1, .LBB216_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: mv a4, ra +; RV32-NEXT: .LBB216_30: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, ra, .LBB216_31 +; RV32-NEXT: j .LBB216_378 +; RV32-NEXT: .LBB216_31: +; RV32-NEXT: beq a1, s1, .LBB216_32 +; RV32-NEXT: j .LBB216_379 +; RV32-NEXT: .LBB216_32: +; RV32-NEXT: bltu a1, s1, .LBB216_33 +; RV32-NEXT: j .LBB216_380 +; RV32-NEXT: .LBB216_33: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, s11, .LBB216_35 +; RV32-NEXT: .LBB216_34: +; RV32-NEXT: mv a4, s8 +; RV32-NEXT: .LBB216_35: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t5, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s8, .LBB216_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: .LBB216_37: +; RV32-NEXT: beq a1, s11, .LBB216_39 +; RV32-NEXT: # %bb.38: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB216_39: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 132(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, s11, .LBB216_41 +; RV32-NEXT: # %bb.40: +; RV32-NEXT: mv a1, s11 +; RV32-NEXT: .LBB216_41: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, t3, .LBB216_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: mv a4, s2 +; RV32-NEXT: .LBB216_43: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s3, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s11, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s2, .LBB216_44 +; RV32-NEXT: j .LBB216_381 +; RV32-NEXT: .LBB216_44: +; RV32-NEXT: beq a1, t3, .LBB216_45 +; RV32-NEXT: j .LBB216_382 +; RV32-NEXT: .LBB216_45: +; RV32-NEXT: bltu a1, t3, .LBB216_47 +; RV32-NEXT: .LBB216_46: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB216_47: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 16(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, t0, .LBB216_49 +; RV32-NEXT: # %bb.48: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: .LBB216_49: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 24(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 20(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s0, .LBB216_51 +; RV32-NEXT: # %bb.50: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: .LBB216_51: +; RV32-NEXT: beq a1, t0, .LBB216_53 +; RV32-NEXT: # %bb.52: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_53: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s8, 116(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, t0, .LBB216_55 +; RV32-NEXT: # %bb.54: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB216_55: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: bltu a1, a6, .LBB216_57 +; RV32-NEXT: # %bb.56: +; RV32-NEXT: mv s0, a4 +; RV32-NEXT: .LBB216_57: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 152(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s6, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a4, .LBB216_58 +; RV32-NEXT: j .LBB216_383 +; RV32-NEXT: .LBB216_58: +; RV32-NEXT: beq a1, a6, .LBB216_59 +; RV32-NEXT: j .LBB216_384 +; RV32-NEXT: .LBB216_59: +; RV32-NEXT: bltu a1, a6, .LBB216_60 +; RV32-NEXT: j .LBB216_385 +; RV32-NEXT: .LBB216_60: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, t2, .LBB216_62 +; RV32-NEXT: .LBB216_61: +; RV32-NEXT: mv a4, s1 +; RV32-NEXT: .LBB216_62: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a6, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, s1, .LBB216_63 +; RV32-NEXT: j .LBB216_386 +; RV32-NEXT: .LBB216_63: +; RV32-NEXT: beq a1, t2, .LBB216_64 +; RV32-NEXT: j .LBB216_387 +; RV32-NEXT: .LBB216_64: +; RV32-NEXT: bltu a1, t2, .LBB216_66 +; RV32-NEXT: .LBB216_65: +; RV32-NEXT: mv a1, t2 +; RV32-NEXT: .LBB216_66: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t6, 108(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 12(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, s1, .LBB216_67 +; RV32-NEXT: j .LBB216_388 +; RV32-NEXT: .LBB216_67: +; RV32-NEXT: bltu a0, a5, .LBB216_68 +; RV32-NEXT: j .LBB216_389 +; RV32-NEXT: .LBB216_68: +; RV32-NEXT: beq a1, s1, .LBB216_69 +; RV32-NEXT: j .LBB216_390 +; RV32-NEXT: .LBB216_69: +; RV32-NEXT: bltu a1, s1, .LBB216_70 +; RV32-NEXT: j .LBB216_391 +; RV32-NEXT: .LBB216_70: +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bltu a1, s0, .LBB216_72 +; RV32-NEXT: .LBB216_71: +; RV32-NEXT: mv a5, a6 +; RV32-NEXT: .LBB216_72: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t3, 120(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a6, .LBB216_73 +; RV32-NEXT: j .LBB216_392 +; RV32-NEXT: .LBB216_73: +; RV32-NEXT: beq a1, s0, .LBB216_74 +; RV32-NEXT: j .LBB216_393 +; RV32-NEXT: .LBB216_74: +; RV32-NEXT: bltu a1, s0, .LBB216_75 +; RV32-NEXT: j .LBB216_394 +; RV32-NEXT: .LBB216_75: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: bltu a1, s1, .LBB216_77 +; RV32-NEXT: .LBB216_76: +; RV32-NEXT: mv s0, a2 +; RV32-NEXT: .LBB216_77: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw ra, 172(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s2, 76(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s7, 72(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 68(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 64(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 60(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 56(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, a2, .LBB216_395 +; RV32-NEXT: # %bb.78: +; RV32-NEXT: bne a1, s1, .LBB216_396 +; RV32-NEXT: .LBB216_79: +; RV32-NEXT: bgeu a1, s1, .LBB216_397 +; RV32-NEXT: .LBB216_80: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, t5, .LBB216_82 +; RV32-NEXT: .LBB216_81: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 328(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_82: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t2, t2, s0 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw t2, 208(t2) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s4, 140(s0) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s9, 328(s0) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s9, .LBB216_398 +; RV32-NEXT: # %bb.83: +; RV32-NEXT: bne a1, t5, .LBB216_399 +; RV32-NEXT: .LBB216_84: +; RV32-NEXT: bltu a1, t5, .LBB216_86 +; RV32-NEXT: .LBB216_85: +; RV32-NEXT: mv a1, t5 +; RV32-NEXT: .LBB216_86: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t5, t5, s0 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: lw s1, 392(t5) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, s1, .LBB216_88 +; RV32-NEXT: # %bb.87: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: .LBB216_88: +; RV32-NEXT: bltu a0, t4, .LBB216_90 +; RV32-NEXT: # %bb.89: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: .LBB216_90: +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: addi t5, zero, 232 +; RV32-NEXT: mul t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: lw t4, 392(t4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB216_92 +; RV32-NEXT: # %bb.91: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_92: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t4, zero, 232 +; RV32-NEXT: mul a2, a2, t4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 392(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_400 +; RV32-NEXT: # %bb.93: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a5, .LBB216_401 +; RV32-NEXT: .LBB216_94: +; RV32-NEXT: bgeu a0, a3, .LBB216_402 +; RV32-NEXT: .LBB216_95: +; RV32-NEXT: beq a1, a5, .LBB216_97 +; RV32-NEXT: .LBB216_96: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_97: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a5, .LBB216_403 +; RV32-NEXT: # %bb.98: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a4, .LBB216_404 +; RV32-NEXT: .LBB216_99: +; RV32-NEXT: bgeu a0, a6, .LBB216_405 +; RV32-NEXT: .LBB216_100: +; RV32-NEXT: beq a1, a4, .LBB216_102 +; RV32-NEXT: .LBB216_101: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_102: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 136(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a4, .LBB216_406 +; RV32-NEXT: # %bb.103: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s2, .LBB216_407 +; RV32-NEXT: .LBB216_104: +; RV32-NEXT: bgeu a0, s7, .LBB216_408 +; RV32-NEXT: .LBB216_105: +; RV32-NEXT: bne a1, s2, .LBB216_409 +; RV32-NEXT: .LBB216_106: +; RV32-NEXT: bgeu a1, s2, .LBB216_410 +; RV32-NEXT: .LBB216_107: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s6, .LBB216_109 +; RV32-NEXT: .LBB216_108: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 332(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_109: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s7, 332(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s7, .LBB216_411 +; RV32-NEXT: # %bb.110: +; RV32-NEXT: bne a1, s6, .LBB216_412 +; RV32-NEXT: .LBB216_111: +; RV32-NEXT: bltu a1, s6, .LBB216_113 +; RV32-NEXT: .LBB216_112: +; RV32-NEXT: mv a1, s6 +; RV32-NEXT: .LBB216_113: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a5, 396(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a5, .LBB216_115 +; RV32-NEXT: # %bb.114: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB216_115: +; RV32-NEXT: bltu a0, a3, .LBB216_117 +; RV32-NEXT: # %bb.116: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB216_117: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 396(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 88(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB216_119 +; RV32-NEXT: # %bb.118: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_119: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 396(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 112(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_413 +; RV32-NEXT: # %bb.120: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s3, .LBB216_414 +; RV32-NEXT: .LBB216_121: +; RV32-NEXT: bgeu a0, s11, .LBB216_415 +; RV32-NEXT: .LBB216_122: +; RV32-NEXT: bne a1, s3, .LBB216_416 +; RV32-NEXT: .LBB216_123: +; RV32-NEXT: bgeu a1, s3, .LBB216_417 +; RV32-NEXT: .LBB216_124: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s10, .LBB216_126 +; RV32-NEXT: .LBB216_125: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 336(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_126: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 336(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s11, .LBB216_418 +; RV32-NEXT: # %bb.127: +; RV32-NEXT: bne a1, s10, .LBB216_419 +; RV32-NEXT: .LBB216_128: +; RV32-NEXT: bgeu a1, s10, .LBB216_420 +; RV32-NEXT: .LBB216_129: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, t1, .LBB216_131 +; RV32-NEXT: .LBB216_130: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 340(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_131: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s10, 340(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s10, .LBB216_421 +; RV32-NEXT: # %bb.132: +; RV32-NEXT: bne a1, t1, .LBB216_422 +; RV32-NEXT: .LBB216_133: +; RV32-NEXT: bgeu a1, t1, .LBB216_423 +; RV32-NEXT: .LBB216_134: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s5, .LBB216_136 +; RV32-NEXT: .LBB216_135: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 344(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_136: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t1, 344(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, t1, .LBB216_424 +; RV32-NEXT: # %bb.137: +; RV32-NEXT: bne a1, s5, .LBB216_425 +; RV32-NEXT: .LBB216_138: +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: bgeu a1, s5, .LBB216_426 +; RV32-NEXT: .LBB216_139: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, t6, .LBB216_141 +; RV32-NEXT: .LBB216_140: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 348(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_141: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s5, 348(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s5, .LBB216_427 +; RV32-NEXT: # %bb.142: +; RV32-NEXT: bne a1, t6, .LBB216_428 +; RV32-NEXT: .LBB216_143: +; RV32-NEXT: bltu a1, t6, .LBB216_145 +; RV32-NEXT: .LBB216_144: +; RV32-NEXT: mv a1, t6 +; RV32-NEXT: .LBB216_145: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a4, .LBB216_147 +; RV32-NEXT: # %bb.146: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB216_147: +; RV32-NEXT: bltu a0, a5, .LBB216_149 +; RV32-NEXT: # %bb.148: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB216_149: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 124(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB216_151 +; RV32-NEXT: # %bb.150: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_151: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 400(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 128(a5) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_429 +; RV32-NEXT: # %bb.152: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s8, .LBB216_430 +; RV32-NEXT: .LBB216_153: +; RV32-NEXT: bgeu a0, a3, .LBB216_431 +; RV32-NEXT: .LBB216_154: +; RV32-NEXT: bne a1, s8, .LBB216_432 +; RV32-NEXT: .LBB216_155: +; RV32-NEXT: bgeu a1, s8, .LBB216_433 +; RV32-NEXT: .LBB216_156: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, t3, .LBB216_158 +; RV32-NEXT: .LBB216_157: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 352(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_158: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 352(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s9, .LBB216_434 +; RV32-NEXT: # %bb.159: +; RV32-NEXT: bne a1, t3, .LBB216_435 +; RV32-NEXT: .LBB216_160: +; RV32-NEXT: bltu a1, t3, .LBB216_162 +; RV32-NEXT: .LBB216_161: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB216_162: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a3, .LBB216_164 +; RV32-NEXT: # %bb.163: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB216_164: +; RV32-NEXT: bltu a0, a4, .LBB216_166 +; RV32-NEXT: # %bb.165: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB216_166: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s10, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 164(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s10, .LBB216_168 +; RV32-NEXT: # %bb.167: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_168: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 404(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 144(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_436 +; RV32-NEXT: # %bb.169: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a7, .LBB216_437 +; RV32-NEXT: .LBB216_170: +; RV32-NEXT: bgeu a0, a5, .LBB216_438 +; RV32-NEXT: .LBB216_171: +; RV32-NEXT: beq a1, a7, .LBB216_173 +; RV32-NEXT: .LBB216_172: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_173: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 156(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a7, .LBB216_175 +; RV32-NEXT: # %bb.174: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: .LBB216_175: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s1, .LBB216_177 +; RV32-NEXT: # %bb.176: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 356(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_177: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s8, 356(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s2, 196(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw a7, 220(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s8, .LBB216_439 +; RV32-NEXT: # %bb.178: +; RV32-NEXT: bne a1, s1, .LBB216_440 +; RV32-NEXT: .LBB216_179: +; RV32-NEXT: bgeu a1, s1, .LBB216_441 +; RV32-NEXT: .LBB216_180: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s4, .LBB216_182 +; RV32-NEXT: .LBB216_181: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 360(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_182: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t5, 360(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, t5, .LBB216_442 +; RV32-NEXT: # %bb.183: +; RV32-NEXT: bne a1, s4, .LBB216_443 +; RV32-NEXT: .LBB216_184: +; RV32-NEXT: bltu a1, s4, .LBB216_186 +; RV32-NEXT: .LBB216_185: +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: .LBB216_186: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s0, 408(a6) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, s0, .LBB216_188 +; RV32-NEXT: # %bb.187: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB216_188: +; RV32-NEXT: bltu a0, a4, .LBB216_190 +; RV32-NEXT: # %bb.189: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB216_190: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 408(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s11, .LBB216_192 +; RV32-NEXT: # %bb.191: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_192: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 408(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 148(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_444 +; RV32-NEXT: # %bb.193: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, t0, .LBB216_445 +; RV32-NEXT: .LBB216_194: +; RV32-NEXT: bgeu a0, a4, .LBB216_446 +; RV32-NEXT: .LBB216_195: +; RV32-NEXT: beq a1, t0, .LBB216_197 +; RV32-NEXT: .LBB216_196: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_197: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 168(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, t0, .LBB216_199 +; RV32-NEXT: # %bb.198: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB216_199: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a5, .LBB216_201 +; RV32-NEXT: # %bb.200: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 364(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_201: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t0, 364(a6) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, t0, .LBB216_203 +; RV32-NEXT: # %bb.202: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: .LBB216_203: +; RV32-NEXT: beq a1, a5, .LBB216_205 +; RV32-NEXT: # %bb.204: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_205: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 200(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 184(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 204(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a5, .LBB216_207 +; RV32-NEXT: # %bb.206: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB216_207: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 160(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a5, .LBB216_209 +; RV32-NEXT: # %bb.208: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 368(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_209: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s4, 368(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s4, .LBB216_447 +; RV32-NEXT: # %bb.210: +; RV32-NEXT: bne a1, a5, .LBB216_448 +; RV32-NEXT: .LBB216_211: +; RV32-NEXT: bltu a1, a5, .LBB216_213 +; RV32-NEXT: .LBB216_212: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB216_213: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 412(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a5, .LBB216_215 +; RV32-NEXT: # %bb.214: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB216_215: +; RV32-NEXT: bltu a0, a3, .LBB216_217 +; RV32-NEXT: # %bb.216: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB216_217: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 412(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 216(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t5, .LBB216_219 +; RV32-NEXT: # %bb.218: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_219: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 412(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 180(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_449 +; RV32-NEXT: # %bb.220: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, ra, .LBB216_450 +; RV32-NEXT: .LBB216_221: +; RV32-NEXT: bgeu a0, a4, .LBB216_451 +; RV32-NEXT: .LBB216_222: +; RV32-NEXT: beq a1, ra, .LBB216_224 +; RV32-NEXT: .LBB216_223: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_224: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 176(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, ra, .LBB216_226 +; RV32-NEXT: # %bb.225: +; RV32-NEXT: mv a1, ra +; RV32-NEXT: .LBB216_226: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a4, .LBB216_228 +; RV32-NEXT: # %bb.227: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 372(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_228: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s7, 372(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s7, .LBB216_452 +; RV32-NEXT: # %bb.229: +; RV32-NEXT: bne a1, a4, .LBB216_453 +; RV32-NEXT: .LBB216_230: +; RV32-NEXT: bltu a1, a4, .LBB216_232 +; RV32-NEXT: .LBB216_231: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB216_232: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 416(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a4, .LBB216_234 +; RV32-NEXT: # %bb.233: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB216_234: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a6, 244(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a3, .LBB216_236 +; RV32-NEXT: # %bb.235: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB216_236: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 416(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 264(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 212(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s9, .LBB216_238 +; RV32-NEXT: # %bb.237: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_238: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 416(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 188(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_454 +; RV32-NEXT: # %bb.239: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a3, .LBB216_455 +; RV32-NEXT: .LBB216_240: +; RV32-NEXT: bgeu a0, s0, .LBB216_456 +; RV32-NEXT: .LBB216_241: +; RV32-NEXT: bne a1, a3, .LBB216_457 +; RV32-NEXT: .LBB216_242: +; RV32-NEXT: bltu a1, a3, .LBB216_244 +; RV32-NEXT: .LBB216_243: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB216_244: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 192(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a3, .LBB216_246 +; RV32-NEXT: # %bb.245: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 376(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_246: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw s11, 376(t2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, s11, .LBB216_458 +; RV32-NEXT: # %bb.247: +; RV32-NEXT: bne a1, a3, .LBB216_459 +; RV32-NEXT: .LBB216_248: +; RV32-NEXT: bltu a1, a3, .LBB216_250 +; RV32-NEXT: .LBB216_249: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB216_250: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a3, .LBB216_252 +; RV32-NEXT: # %bb.251: +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: .LBB216_252: +; RV32-NEXT: bltu a0, s2, .LBB216_254 +; RV32-NEXT: # %bb.253: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: .LBB216_254: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t2, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul a3, a3, t3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 232(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t2, .LBB216_256 +; RV32-NEXT: # %bb.255: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_256: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 420(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a2, .LBB216_258 +; RV32-NEXT: # %bb.257: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB216_258: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 228(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, s1, .LBB216_460 +; RV32-NEXT: # %bb.259: +; RV32-NEXT: bgeu a0, t0, .LBB216_461 +; RV32-NEXT: .LBB216_260: +; RV32-NEXT: bne a1, s1, .LBB216_462 +; RV32-NEXT: .LBB216_261: +; RV32-NEXT: bgeu a1, s1, .LBB216_463 +; RV32-NEXT: .LBB216_262: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, t1, .LBB216_264 +; RV32-NEXT: .LBB216_263: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a2, a2, t0 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 380(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_264: +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul t0, t0, t2 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw t0, 380(t0) # 4-byte Folded Reload +; RV32-NEXT: bgeu a0, t0, .LBB216_464 +; RV32-NEXT: # %bb.265: +; RV32-NEXT: bne a1, t1, .LBB216_465 +; RV32-NEXT: .LBB216_266: +; RV32-NEXT: bltu a1, t1, .LBB216_268 +; RV32-NEXT: .LBB216_267: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB216_268: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t1, zero, 232 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw s0, 424(t0) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, s0, .LBB216_270 +; RV32-NEXT: # %bb.269: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB216_270: +; RV32-NEXT: bltu a0, a4, .LBB216_272 +; RV32-NEXT: # %bb.271: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB216_272: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t3, 424(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t3, .LBB216_274 +; RV32-NEXT: # %bb.273: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_274: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 424(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 224(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a2, .LBB216_466 +; RV32-NEXT: # %bb.275: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a7, .LBB216_467 +; RV32-NEXT: .LBB216_276: +; RV32-NEXT: bgeu a0, a5, .LBB216_468 +; RV32-NEXT: .LBB216_277: +; RV32-NEXT: bne a1, a7, .LBB216_469 +; RV32-NEXT: .LBB216_278: +; RV32-NEXT: bgeu a1, a7, .LBB216_470 +; RV32-NEXT: .LBB216_279: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a4, .LBB216_281 +; RV32-NEXT: .LBB216_280: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 384(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_281: +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t1, 384(a5) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t0, 312(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, t1, .LBB216_283 +; RV32-NEXT: # %bb.282: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: .LBB216_283: +; RV32-NEXT: beq a1, a4, .LBB216_285 +; RV32-NEXT: # %bb.284: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_285: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 236(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a4, .LBB216_287 +; RV32-NEXT: # %bb.286: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB216_287: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a3, .LBB216_289 +; RV32-NEXT: # %bb.288: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 388(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB216_289: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a7, 388(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a7, .LBB216_291 +; RV32-NEXT: # %bb.290: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: .LBB216_291: +; RV32-NEXT: beq a1, a3, .LBB216_293 +; RV32-NEXT: # %bb.292: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_293: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 256(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 260(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 272(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 252(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 248(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a3, .LBB216_471 +; RV32-NEXT: # %bb.294: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s1, .LBB216_472 +; RV32-NEXT: .LBB216_295: +; RV32-NEXT: bgeu a0, t4, .LBB216_473 +; RV32-NEXT: .LBB216_296: +; RV32-NEXT: beq a1, s1, .LBB216_298 +; RV32-NEXT: .LBB216_297: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_298: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 240(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, s1, .LBB216_474 +; RV32-NEXT: # %bb.299: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a6, .LBB216_475 +; RV32-NEXT: .LBB216_300: +; RV32-NEXT: bgeu a0, a3, .LBB216_476 +; RV32-NEXT: .LBB216_301: +; RV32-NEXT: bne a1, a6, .LBB216_477 +; RV32-NEXT: .LBB216_302: +; RV32-NEXT: bgeu a1, a6, .LBB216_478 +; RV32-NEXT: .LBB216_303: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a4, .LBB216_305 +; RV32-NEXT: .LBB216_304: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB216_305: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 304(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 284(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a5, .LBB216_307 +; RV32-NEXT: # %bb.306: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB216_307: +; RV32-NEXT: beq a1, a4, .LBB216_309 +; RV32-NEXT: # %bb.308: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_309: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 268(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a4, .LBB216_479 +; RV32-NEXT: # %bb.310: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s0, .LBB216_480 +; RV32-NEXT: .LBB216_311: +; RV32-NEXT: bgeu a0, t1, .LBB216_481 +; RV32-NEXT: .LBB216_312: +; RV32-NEXT: bne a1, s0, .LBB216_482 +; RV32-NEXT: .LBB216_313: +; RV32-NEXT: bgeu a1, s0, .LBB216_483 +; RV32-NEXT: .LBB216_314: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a5, .LBB216_484 +; RV32-NEXT: .LBB216_315: +; RV32-NEXT: bgeu a0, t5, .LBB216_485 +; RV32-NEXT: .LBB216_316: +; RV32-NEXT: beq a1, a5, .LBB216_318 +; RV32-NEXT: .LBB216_317: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_318: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 276(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a5, .LBB216_486 +; RV32-NEXT: # %bb.319: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a4, .LBB216_487 +; RV32-NEXT: .LBB216_320: +; RV32-NEXT: bgeu a0, a7, .LBB216_488 +; RV32-NEXT: .LBB216_321: +; RV32-NEXT: beq a1, a4, .LBB216_323 +; RV32-NEXT: .LBB216_322: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_323: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 296(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 292(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a4, .LBB216_325 +; RV32-NEXT: # %bb.324: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB216_325: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a4, a4, a7 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 280(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a3, .LBB216_489 +; RV32-NEXT: # %bb.326: +; RV32-NEXT: bgeu a0, a4, .LBB216_490 +; RV32-NEXT: .LBB216_327: +; RV32-NEXT: bne a1, a3, .LBB216_491 +; RV32-NEXT: .LBB216_328: +; RV32-NEXT: bltu a1, a3, .LBB216_330 +; RV32-NEXT: .LBB216_329: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB216_330: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 288(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a5, .LBB216_332 +; RV32-NEXT: # %bb.331: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB216_332: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a3, a3, a7 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 316(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a0, a4, .LBB216_334 +; RV32-NEXT: # %bb.333: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB216_334: +; RV32-NEXT: beq a1, a5, .LBB216_336 +; RV32-NEXT: # %bb.335: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_336: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 320(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a1, a5, .LBB216_338 +; RV32-NEXT: # %bb.337: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB216_338: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 300(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a5, .LBB216_492 +; RV32-NEXT: # %bb.339: +; RV32-NEXT: bgeu a0, s1, .LBB216_493 +; RV32-NEXT: .LBB216_340: +; RV32-NEXT: bne a1, a5, .LBB216_494 +; RV32-NEXT: .LBB216_341: +; RV32-NEXT: bltu a1, a5, .LBB216_343 +; RV32-NEXT: .LBB216_342: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB216_343: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 308(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a5, .LBB216_495 +; RV32-NEXT: # %bb.344: +; RV32-NEXT: bgeu a0, a6, .LBB216_496 +; RV32-NEXT: .LBB216_345: +; RV32-NEXT: beq a1, a5, .LBB216_347 +; RV32-NEXT: .LBB216_346: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB216_347: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 324(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a1, a5, .LBB216_497 +; RV32-NEXT: # %bb.348: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s0, .LBB216_498 +; RV32-NEXT: .LBB216_349: +; RV32-NEXT: bgeu a0, t0, .LBB216_499 +; RV32-NEXT: .LBB216_350: +; RV32-NEXT: bne a1, s0, .LBB216_500 +; RV32-NEXT: .LBB216_351: +; RV32-NEXT: bgeu a1, s0, .LBB216_501 +; RV32-NEXT: .LBB216_352: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a3, .LBB216_502 +; RV32-NEXT: .LBB216_353: +; RV32-NEXT: bgeu a0, a4, .LBB216_503 +; RV32-NEXT: .LBB216_354: +; RV32-NEXT: bne a1, a3, .LBB216_504 +; RV32-NEXT: .LBB216_355: +; RV32-NEXT: bltu a1, a3, .LBB216_357 +; RV32-NEXT: .LBB216_356: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB216_357: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 428(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 432(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 436(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 440(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 444(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 448(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 452(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 456(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 460(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 464(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 468(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 472(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 476(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 480 +; RV32-NEXT: ret +; RV32-NEXT: .LBB216_358: +; RV32-NEXT: mv s4, t6 +; RV32-NEXT: bgeu a0, t6, .LBB216_359 +; RV32-NEXT: j .LBB216_2 +; RV32-NEXT: .LBB216_359: +; RV32-NEXT: mv a0, t6 +; RV32-NEXT: bne a1, a3, .LBB216_360 +; RV32-NEXT: j .LBB216_3 +; RV32-NEXT: .LBB216_360: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: bgeu a1, a3, .LBB216_361 +; RV32-NEXT: j .LBB216_4 +; RV32-NEXT: .LBB216_361: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bgeu a1, a4, .LBB216_362 +; RV32-NEXT: j .LBB216_5 +; RV32-NEXT: .LBB216_362: +; RV32-NEXT: mv a3, t4 +; RV32-NEXT: bgeu a0, t4, .LBB216_363 +; RV32-NEXT: j .LBB216_6 +; RV32-NEXT: .LBB216_363: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: beq a1, a4, .LBB216_505 +; RV32-NEXT: j .LBB216_7 +; RV32-NEXT: .LBB216_505: +; RV32-NEXT: j .LBB216_8 +; RV32-NEXT: .LBB216_364: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bgeu a1, s10, .LBB216_365 +; RV32-NEXT: j .LBB216_10 +; RV32-NEXT: .LBB216_365: +; RV32-NEXT: mv a4, a7 +; RV32-NEXT: bgeu a0, a7, .LBB216_366 +; RV32-NEXT: j .LBB216_11 +; RV32-NEXT: .LBB216_366: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, s10, .LBB216_367 +; RV32-NEXT: j .LBB216_12 +; RV32-NEXT: .LBB216_367: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu a1, s10, .LBB216_368 +; RV32-NEXT: j .LBB216_13 +; RV32-NEXT: .LBB216_368: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, s3, .LBB216_506 +; RV32-NEXT: j .LBB216_14 +; RV32-NEXT: .LBB216_506: +; RV32-NEXT: j .LBB216_15 +; RV32-NEXT: .LBB216_369: +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: bne a1, s3, .LBB216_370 +; RV32-NEXT: j .LBB216_17 +; RV32-NEXT: .LBB216_370: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu a1, s3, .LBB216_371 +; RV32-NEXT: j .LBB216_18 +; RV32-NEXT: .LBB216_371: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bgeu a1, a5, .LBB216_372 +; RV32-NEXT: j .LBB216_19 +; RV32-NEXT: .LBB216_372: +; RV32-NEXT: mv a4, t5 +; RV32-NEXT: bgeu a0, t5, .LBB216_373 +; RV32-NEXT: j .LBB216_20 +; RV32-NEXT: .LBB216_373: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB216_374 +; RV32-NEXT: j .LBB216_21 +; RV32-NEXT: .LBB216_374: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu a1, a5, .LBB216_375 +; RV32-NEXT: j .LBB216_22 +; RV32-NEXT: .LBB216_375: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bgeu a1, t1, .LBB216_376 +; RV32-NEXT: j .LBB216_23 +; RV32-NEXT: .LBB216_376: +; RV32-NEXT: mv a4, s5 +; RV32-NEXT: bgeu a0, s5, .LBB216_377 +; RV32-NEXT: j .LBB216_24 +; RV32-NEXT: .LBB216_377: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t1, .LBB216_507 +; RV32-NEXT: j .LBB216_25 +; RV32-NEXT: .LBB216_507: +; RV32-NEXT: j .LBB216_26 +; RV32-NEXT: .LBB216_378: +; RV32-NEXT: mv a0, ra +; RV32-NEXT: bne a1, s1, .LBB216_379 +; RV32-NEXT: j .LBB216_32 +; RV32-NEXT: .LBB216_379: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu a1, s1, .LBB216_380 +; RV32-NEXT: j .LBB216_33 +; RV32-NEXT: .LBB216_380: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, s11, .LBB216_508 +; RV32-NEXT: j .LBB216_34 +; RV32-NEXT: .LBB216_508: +; RV32-NEXT: j .LBB216_35 +; RV32-NEXT: .LBB216_381: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: bne a1, t3, .LBB216_382 +; RV32-NEXT: j .LBB216_45 +; RV32-NEXT: .LBB216_382: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bltu a1, t3, .LBB216_509 +; RV32-NEXT: j .LBB216_46 +; RV32-NEXT: .LBB216_509: +; RV32-NEXT: j .LBB216_47 +; RV32-NEXT: .LBB216_383: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, a6, .LBB216_384 +; RV32-NEXT: j .LBB216_59 +; RV32-NEXT: .LBB216_384: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: bgeu a1, a6, .LBB216_385 +; RV32-NEXT: j .LBB216_60 +; RV32-NEXT: .LBB216_385: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a1, t2, .LBB216_510 +; RV32-NEXT: j .LBB216_61 +; RV32-NEXT: .LBB216_510: +; RV32-NEXT: j .LBB216_62 +; RV32-NEXT: .LBB216_386: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: bne a1, t2, .LBB216_387 +; RV32-NEXT: j .LBB216_64 +; RV32-NEXT: .LBB216_387: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bltu a1, t2, .LBB216_511 +; RV32-NEXT: j .LBB216_65 +; RV32-NEXT: .LBB216_511: +; RV32-NEXT: j .LBB216_66 +; RV32-NEXT: .LBB216_388: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bgeu a0, a5, .LBB216_389 +; RV32-NEXT: j .LBB216_68 +; RV32-NEXT: .LBB216_389: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, s1, .LBB216_390 +; RV32-NEXT: j .LBB216_69 +; RV32-NEXT: .LBB216_390: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, s1, .LBB216_391 +; RV32-NEXT: j .LBB216_70 +; RV32-NEXT: .LBB216_391: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bltu a1, s0, .LBB216_512 +; RV32-NEXT: j .LBB216_71 +; RV32-NEXT: .LBB216_512: +; RV32-NEXT: j .LBB216_72 +; RV32-NEXT: .LBB216_392: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, s0, .LBB216_393 +; RV32-NEXT: j .LBB216_74 +; RV32-NEXT: .LBB216_393: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bgeu a1, s0, .LBB216_394 +; RV32-NEXT: j .LBB216_75 +; RV32-NEXT: .LBB216_394: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: bltu a1, s1, .LBB216_513 +; RV32-NEXT: j .LBB216_76 +; RV32-NEXT: .LBB216_513: +; RV32-NEXT: j .LBB216_77 +; RV32-NEXT: .LBB216_395: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beq a1, s1, .LBB216_79 +; RV32-NEXT: .LBB216_396: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: bltu a1, s1, .LBB216_80 +; RV32-NEXT: .LBB216_397: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, t5, .LBB216_81 +; RV32-NEXT: j .LBB216_82 +; RV32-NEXT: .LBB216_398: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t5, .LBB216_84 +; RV32-NEXT: .LBB216_399: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, t5, .LBB216_85 +; RV32-NEXT: j .LBB216_86 +; RV32-NEXT: .LBB216_400: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a5, .LBB216_94 +; RV32-NEXT: .LBB216_401: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a0, a3, .LBB216_95 +; RV32-NEXT: .LBB216_402: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: bne a1, a5, .LBB216_96 +; RV32-NEXT: j .LBB216_97 +; RV32-NEXT: .LBB216_403: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a4, .LBB216_99 +; RV32-NEXT: .LBB216_404: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a0, a6, .LBB216_100 +; RV32-NEXT: .LBB216_405: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a4, .LBB216_101 +; RV32-NEXT: j .LBB216_102 +; RV32-NEXT: .LBB216_406: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s2, .LBB216_104 +; RV32-NEXT: .LBB216_407: +; RV32-NEXT: mv a2, s7 +; RV32-NEXT: bltu a0, s7, .LBB216_105 +; RV32-NEXT: .LBB216_408: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s2, .LBB216_106 +; RV32-NEXT: .LBB216_409: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s2, .LBB216_107 +; RV32-NEXT: .LBB216_410: +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s6, .LBB216_108 +; RV32-NEXT: j .LBB216_109 +; RV32-NEXT: .LBB216_411: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s6, .LBB216_111 +; RV32-NEXT: .LBB216_412: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, s6, .LBB216_112 +; RV32-NEXT: j .LBB216_113 +; RV32-NEXT: .LBB216_413: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s3, .LBB216_121 +; RV32-NEXT: .LBB216_414: +; RV32-NEXT: mv a2, s11 +; RV32-NEXT: bltu a0, s11, .LBB216_122 +; RV32-NEXT: .LBB216_415: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s3, .LBB216_123 +; RV32-NEXT: .LBB216_416: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s3, .LBB216_124 +; RV32-NEXT: .LBB216_417: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s10, .LBB216_125 +; RV32-NEXT: j .LBB216_126 +; RV32-NEXT: .LBB216_418: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s10, .LBB216_128 +; RV32-NEXT: .LBB216_419: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s10, .LBB216_129 +; RV32-NEXT: .LBB216_420: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, t1, .LBB216_130 +; RV32-NEXT: j .LBB216_131 +; RV32-NEXT: .LBB216_421: +; RV32-NEXT: mv a0, s10 +; RV32-NEXT: beq a1, t1, .LBB216_133 +; RV32-NEXT: .LBB216_422: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, t1, .LBB216_134 +; RV32-NEXT: .LBB216_423: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s5, .LBB216_135 +; RV32-NEXT: j .LBB216_136 +; RV32-NEXT: .LBB216_424: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s5, .LBB216_138 +; RV32-NEXT: .LBB216_425: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: bltu a1, s5, .LBB216_139 +; RV32-NEXT: .LBB216_426: +; RV32-NEXT: mv a1, s5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, t6, .LBB216_140 +; RV32-NEXT: j .LBB216_141 +; RV32-NEXT: .LBB216_427: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t6, .LBB216_143 +; RV32-NEXT: .LBB216_428: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, t6, .LBB216_144 +; RV32-NEXT: j .LBB216_145 +; RV32-NEXT: .LBB216_429: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s8, .LBB216_153 +; RV32-NEXT: .LBB216_430: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a0, a3, .LBB216_154 +; RV32-NEXT: .LBB216_431: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, s8, .LBB216_155 +; RV32-NEXT: .LBB216_432: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s8, .LBB216_156 +; RV32-NEXT: .LBB216_433: +; RV32-NEXT: mv a1, s8 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, t3, .LBB216_157 +; RV32-NEXT: j .LBB216_158 +; RV32-NEXT: .LBB216_434: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t3, .LBB216_160 +; RV32-NEXT: .LBB216_435: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, t3, .LBB216_161 +; RV32-NEXT: j .LBB216_162 +; RV32-NEXT: .LBB216_436: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a7, .LBB216_170 +; RV32-NEXT: .LBB216_437: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a0, a5, .LBB216_171 +; RV32-NEXT: .LBB216_438: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, a7, .LBB216_172 +; RV32-NEXT: j .LBB216_173 +; RV32-NEXT: .LBB216_439: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: beq a1, s1, .LBB216_179 +; RV32-NEXT: .LBB216_440: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s1, .LBB216_180 +; RV32-NEXT: .LBB216_441: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, s4, .LBB216_181 +; RV32-NEXT: j .LBB216_182 +; RV32-NEXT: .LBB216_442: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: beq a1, s4, .LBB216_184 +; RV32-NEXT: .LBB216_443: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, s4, .LBB216_185 +; RV32-NEXT: j .LBB216_186 +; RV32-NEXT: .LBB216_444: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, t0, .LBB216_194 +; RV32-NEXT: .LBB216_445: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB216_195 +; RV32-NEXT: .LBB216_446: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, t0, .LBB216_196 +; RV32-NEXT: j .LBB216_197 +; RV32-NEXT: .LBB216_447: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: beq a1, a5, .LBB216_211 +; RV32-NEXT: .LBB216_448: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a5, .LBB216_212 +; RV32-NEXT: j .LBB216_213 +; RV32-NEXT: .LBB216_449: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, ra, .LBB216_221 +; RV32-NEXT: .LBB216_450: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB216_222 +; RV32-NEXT: .LBB216_451: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, ra, .LBB216_223 +; RV32-NEXT: j .LBB216_224 +; RV32-NEXT: .LBB216_452: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, a4, .LBB216_230 +; RV32-NEXT: .LBB216_453: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a4, .LBB216_231 +; RV32-NEXT: j .LBB216_232 +; RV32-NEXT: .LBB216_454: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a3, .LBB216_240 +; RV32-NEXT: .LBB216_455: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: bltu a0, s0, .LBB216_241 +; RV32-NEXT: .LBB216_456: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: beq a1, a3, .LBB216_242 +; RV32-NEXT: .LBB216_457: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a3, .LBB216_243 +; RV32-NEXT: j .LBB216_244 +; RV32-NEXT: .LBB216_458: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, a3, .LBB216_248 +; RV32-NEXT: .LBB216_459: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a3, .LBB216_249 +; RV32-NEXT: j .LBB216_250 +; RV32-NEXT: .LBB216_460: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu a0, t0, .LBB216_260 +; RV32-NEXT: .LBB216_461: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s1, .LBB216_261 +; RV32-NEXT: .LBB216_462: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s1, .LBB216_262 +; RV32-NEXT: .LBB216_463: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, t1, .LBB216_263 +; RV32-NEXT: j .LBB216_264 +; RV32-NEXT: .LBB216_464: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, t1, .LBB216_266 +; RV32-NEXT: .LBB216_465: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, t1, .LBB216_267 +; RV32-NEXT: j .LBB216_268 +; RV32-NEXT: .LBB216_466: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a7, .LBB216_276 +; RV32-NEXT: .LBB216_467: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a0, a5, .LBB216_277 +; RV32-NEXT: .LBB216_468: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: beq a1, a7, .LBB216_278 +; RV32-NEXT: .LBB216_469: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, a7, .LBB216_279 +; RV32-NEXT: .LBB216_470: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a4, .LBB216_280 +; RV32-NEXT: j .LBB216_281 +; RV32-NEXT: .LBB216_471: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s1, .LBB216_295 +; RV32-NEXT: .LBB216_472: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: bltu a0, t4, .LBB216_296 +; RV32-NEXT: .LBB216_473: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: bne a1, s1, .LBB216_297 +; RV32-NEXT: j .LBB216_298 +; RV32-NEXT: .LBB216_474: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a6, .LBB216_300 +; RV32-NEXT: .LBB216_475: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a0, a3, .LBB216_301 +; RV32-NEXT: .LBB216_476: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, a6, .LBB216_302 +; RV32-NEXT: .LBB216_477: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, a6, .LBB216_303 +; RV32-NEXT: .LBB216_478: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a1, a4, .LBB216_304 +; RV32-NEXT: j .LBB216_305 +; RV32-NEXT: .LBB216_479: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s0, .LBB216_311 +; RV32-NEXT: .LBB216_480: +; RV32-NEXT: mv a2, t1 +; RV32-NEXT: bltu a0, t1, .LBB216_312 +; RV32-NEXT: .LBB216_481: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s0, .LBB216_313 +; RV32-NEXT: .LBB216_482: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s0, .LBB216_314 +; RV32-NEXT: .LBB216_483: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a5, .LBB216_315 +; RV32-NEXT: .LBB216_484: +; RV32-NEXT: mv a2, t5 +; RV32-NEXT: bltu a0, t5, .LBB216_316 +; RV32-NEXT: .LBB216_485: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB216_317 +; RV32-NEXT: j .LBB216_318 +; RV32-NEXT: .LBB216_486: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a4, .LBB216_320 +; RV32-NEXT: .LBB216_487: +; RV32-NEXT: mv a2, a7 +; RV32-NEXT: bltu a0, a7, .LBB216_321 +; RV32-NEXT: .LBB216_488: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, a4, .LBB216_322 +; RV32-NEXT: j .LBB216_323 +; RV32-NEXT: .LBB216_489: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB216_327 +; RV32-NEXT: .LBB216_490: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB216_328 +; RV32-NEXT: .LBB216_491: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a3, .LBB216_329 +; RV32-NEXT: j .LBB216_330 +; RV32-NEXT: .LBB216_492: +; RV32-NEXT: mv a2, s1 +; RV32-NEXT: bltu a0, s1, .LBB216_340 +; RV32-NEXT: .LBB216_493: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: beq a1, a5, .LBB216_341 +; RV32-NEXT: .LBB216_494: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a5, .LBB216_342 +; RV32-NEXT: j .LBB216_343 +; RV32-NEXT: .LBB216_495: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a0, a6, .LBB216_345 +; RV32-NEXT: .LBB216_496: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a5, .LBB216_346 +; RV32-NEXT: j .LBB216_347 +; RV32-NEXT: .LBB216_497: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, s0, .LBB216_349 +; RV32-NEXT: .LBB216_498: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu a0, t0, .LBB216_350 +; RV32-NEXT: .LBB216_499: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s0, .LBB216_351 +; RV32-NEXT: .LBB216_500: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a1, s0, .LBB216_352 +; RV32-NEXT: .LBB216_501: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a1, a3, .LBB216_353 +; RV32-NEXT: .LBB216_502: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a0, a4, .LBB216_354 +; RV32-NEXT: .LBB216_503: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB216_355 +; RV32-NEXT: .LBB216_504: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a1, a3, .LBB216_356 +; RV32-NEXT: j .LBB216_357 +; +; RV64-LABEL: vreduce_umin_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vminu.vv v16, v24, v16 +; RV64-NEXT: vminu.vv v8, v8, v0 +; RV64-NEXT: vminu.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, -1 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredminu.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> %v) + ret i64 %red +} + +declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>) + +define i8 @vreduce_umax_v1i8(<1 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i8>, <1 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>) + +define i8 @vreduce_umax_v2i8(<2 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i8>, <2 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) + +define i8 @vreduce_umax_v4i8(<4 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) + +define i8 @vreduce_umax_v8i8(<8 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) + +define i8 @vreduce_umax_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i8>, <16 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) + +define i8 @vreduce_umax_v32i8(<32 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; CHECK-NEXT: vle8.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i8>, <32 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) + +define i8 @vreduce_umax_v64i8(<64 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e8,m4,ta,mu +; CHECK-NEXT: vle8.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i8>, <64 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) + +define i8 @vreduce_umax_v128i8(<128 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i8>, <128 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> %v) + ret i8 %red +} + +declare i8 @llvm.vector.reduce.umax.v256i8(<256 x i8>) + +define i8 @vreduce_umax_v256i8(<256 x i8>* %x) { +; CHECK-LABEL: vreduce_umax_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 128 +; CHECK-NEXT: vsetvli a2, a1, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <256 x i8>, <256 x i8>* %x + %red = call i8 @llvm.vector.reduce.umax.v256i8(<256 x i8> %v) + ret i8 %red +} + +declare i16 @llvm.vector.reduce.umax.v1i16(<1 x i16>) + +define i16 @vreduce_umax_v1i16(<1 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i16>, <1 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) + +define i16 @vreduce_umax_v2i16(<2 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) + +define i16 @vreduce_umax_v4i16(<4 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) + +define i16 @vreduce_umax_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i16>, <8 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) + +define i16 @vreduce_umax_v16i16(<16 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; CHECK-NEXT: vle16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i16>, <16 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>) + +define i16 @vreduce_umax_v32i16(<32 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e16,m4,ta,mu +; CHECK-NEXT: vle16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i16>, <32 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>) + +define i16 @vreduce_umax_v64i16(<64 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i16>, <64 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> %v) + ret i16 %red +} + +declare i16 @llvm.vector.reduce.umax.v128i16(<128 x i16>) + +define i16 @vreduce_umax_v128i16(<128 x i16>* %x) { +; CHECK-LABEL: vreduce_umax_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <128 x i16>, <128 x i16>* %x + %red = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> %v) + ret i16 %red +} + +declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32>) + +define i32 @vreduce_umax_v1i32(<1 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <1 x i32>, <1 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) + +define i32 @vreduce_umax_v2i32(<2 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) + +define i32 @vreduce_umax_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v25, v26 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <4 x i32>, <4 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) + +define i32 @vreduce_umax_v8i32(<8 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; CHECK-NEXT: vle32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <8 x i32>, <8 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>) + +define i32 @vreduce_umax_v16i32(<16 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; CHECK-NEXT: vle32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v28, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <16 x i32>, <16 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>) + +define i32 @vreduce_umax_v32i32(<32 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <32 x i32>, <32 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> %v) + ret i32 %red +} + +declare i32 @llvm.vector.reduce.umax.v64i32(<64 x i32>) + +define i32 @vreduce_umax_v64i32(<64 x i32>* %x) { +; CHECK-LABEL: vreduce_umax_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vredmaxu.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: ret + %v = load <64 x i32>, <64 x i32>* %x + %red = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> %v) + ret i32 %red +} + +declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>) + +define i64 @vreduce_umax_v1i64(<1 x i64>* %x) { +; RV32-LABEL: vreduce_umax_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umax_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <1 x i64>, <1 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) + +define i64 @vreduce_umax_v2i64(<2 x i64>* %x) { +; RV32-LABEL: vreduce_umax_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vredmaxu.vs v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umax_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vredmaxu.vs v25, v25, v26 +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <2 x i64>, <2 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) + +define i64 @vreduce_umax_v4i64(<4 x i64>* %x) { +; RV32-LABEL: vreduce_umax_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vredmaxu.vs v25, v26, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umax_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v26, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vredmaxu.vs v25, v26, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <4 x i64>, <4 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>) + +define i64 @vreduce_umax_v8i64(<8 x i64>* %x) { +; RV32-LABEL: vreduce_umax_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v28, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vredmaxu.vs v25, v28, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umax_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v28, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vredmaxu.vs v25, v28, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <8 x i64>, <8 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>) + +define i64 @vreduce_umax_v16i64(<16 x i64>* %x) { +; RV32-LABEL: vreduce_umax_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vredmaxu.vs v25, v8, v25 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umax_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmaxu.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <16 x i64>, <16 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.v32i64(<32 x i64>) + +define i64 @vreduce_umax_v32i64(<32 x i64>* %x) { +; RV32-LABEL: vreduce_umax_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vmaxu.vv v8, v8, v16 +; RV32-NEXT: vsetivli a0, 8, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vmaxu.vv v28, v8, v16 +; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v8, v28, 4 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vmaxu.vv v26, v28, v8 +; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vmaxu.vv v25, v26, v28 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslidedown.vi v26, v25, 1 +; RV32-NEXT: vmaxu.vv v25, v25, v26 +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsrl.vx v25, v25, a1 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_umax_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmaxu.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmaxu.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <32 x i64>, <32 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> %v) + ret i64 %red +} + +declare i64 @llvm.vector.reduce.umax.v64i64(<64 x i64>) + +define i64 @vreduce_umax_v64i64(<64 x i64>* %x) nounwind { +; RV32-LABEL: vreduce_umax_v64i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -480 +; RV32-NEXT: sw ra, 476(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 472(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 468(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 464(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 460(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 456(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 452(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 448(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 444(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 440(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 436(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 432(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 428(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: addi a2, a0, 384 +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a3, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 320(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 324(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 312(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 316(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 304(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 308(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 296(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 300(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 288(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 292(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 280(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 284(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 272(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 276(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 264(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 268(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 256(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 260(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 248(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 252(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 240(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 244(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 232(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 236(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 388(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 228(a3) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 384(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 224(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 216(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 220(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 212(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 424(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 256 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 380(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 208(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 200(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 204(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 196(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 420(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 376(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 192(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 184(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 188(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 180(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 416(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 372(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 176(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 168(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 172(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 164(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 412(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 368(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 160(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 364(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 156(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 148(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 152(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 144(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 408(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 360(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 140(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 356(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 136(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 128(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 132(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: addi a2, a0, 128 +; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vsetivli a2, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 224 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 124(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 216 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 404(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 208 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 352(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 200 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 120(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 192 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 112(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 184 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 116(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 176 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 168 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 400(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 160 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 348(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 152 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 108(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 144 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 344(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 136 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 104(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 7 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 340(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 120 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 100(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 112 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 336(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 104 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 84(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 96 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 92(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 88 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 96(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 80 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s9, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 396(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 332(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 80(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 72(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 76(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 64(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 68(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 56(a3) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 60(a3) # 4-byte Folded Spill +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s7, v0 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 392(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 328(a3) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: sw a2, 48(a3) # 4-byte Folded Spill +; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli a0, 1, e32,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 1 +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 224 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 40(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 216 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 44(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 208 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 32(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 200 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 36(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 192 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 28(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 184 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 12(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 176 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 24(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 168 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 160 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 20(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 152 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 144 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: addi a2, zero, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a0, 16(a1) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 136 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 120 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 112 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 104 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s11, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 96 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s ra, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 88 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 80 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, zero, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s10, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v0 +; RV32-NEXT: vl8re8.v v0, (sp) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a4, v0 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a1, v24 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t6, zero, 24 +; RV32-NEXT: mul a2, a2, t6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: mv s4, a0 +; RV32-NEXT: bltu a3, a1, .LBB247_1 +; RV32-NEXT: j .LBB247_358 +; RV32-NEXT: .LBB247_1: +; RV32-NEXT: bltu t6, a0, .LBB247_2 +; RV32-NEXT: j .LBB247_359 +; RV32-NEXT: .LBB247_2: +; RV32-NEXT: beq a1, a3, .LBB247_3 +; RV32-NEXT: j .LBB247_360 +; RV32-NEXT: .LBB247_3: +; RV32-NEXT: bltu a3, a1, .LBB247_4 +; RV32-NEXT: j .LBB247_361 +; RV32-NEXT: .LBB247_4: +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bltu a4, a1, .LBB247_5 +; RV32-NEXT: j .LBB247_362 +; RV32-NEXT: .LBB247_5: +; RV32-NEXT: bltu t4, a0, .LBB247_6 +; RV32-NEXT: j .LBB247_363 +; RV32-NEXT: .LBB247_6: +; RV32-NEXT: beq a1, a4, .LBB247_8 +; RV32-NEXT: .LBB247_7: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB247_8: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s0, 88(a2) # 4-byte Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw s9, 52(a2) # 4-byte Folded Spill +; RV32-NEXT: mv t4, s7 +; RV32-NEXT: bltu a4, a1, .LBB247_9 +; RV32-NEXT: j .LBB247_364 +; RV32-NEXT: .LBB247_9: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu s10, a1, .LBB247_10 +; RV32-NEXT: j .LBB247_365 +; RV32-NEXT: .LBB247_10: +; RV32-NEXT: bltu a7, a0, .LBB247_11 +; RV32-NEXT: j .LBB247_366 +; RV32-NEXT: .LBB247_11: +; RV32-NEXT: beq a1, s10, .LBB247_12 +; RV32-NEXT: j .LBB247_367 +; RV32-NEXT: .LBB247_12: +; RV32-NEXT: bltu s10, a1, .LBB247_13 +; RV32-NEXT: j .LBB247_368 +; RV32-NEXT: .LBB247_13: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu s3, a1, .LBB247_15 +; RV32-NEXT: .LBB247_14: +; RV32-NEXT: mv a4, s6 +; RV32-NEXT: .LBB247_15: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s10, 84(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s6, a0, .LBB247_16 +; RV32-NEXT: j .LBB247_369 +; RV32-NEXT: .LBB247_16: +; RV32-NEXT: beq a1, s3, .LBB247_17 +; RV32-NEXT: j .LBB247_370 +; RV32-NEXT: .LBB247_17: +; RV32-NEXT: bltu s3, a1, .LBB247_18 +; RV32-NEXT: j .LBB247_371 +; RV32-NEXT: .LBB247_18: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu a5, a1, .LBB247_19 +; RV32-NEXT: j .LBB247_372 +; RV32-NEXT: .LBB247_19: +; RV32-NEXT: bltu t5, a0, .LBB247_20 +; RV32-NEXT: j .LBB247_373 +; RV32-NEXT: .LBB247_20: +; RV32-NEXT: beq a1, a5, .LBB247_21 +; RV32-NEXT: j .LBB247_374 +; RV32-NEXT: .LBB247_21: +; RV32-NEXT: bltu a5, a1, .LBB247_22 +; RV32-NEXT: j .LBB247_375 +; RV32-NEXT: .LBB247_22: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu t1, a1, .LBB247_23 +; RV32-NEXT: j .LBB247_376 +; RV32-NEXT: .LBB247_23: +; RV32-NEXT: bltu s5, a0, .LBB247_24 +; RV32-NEXT: j .LBB247_377 +; RV32-NEXT: .LBB247_24: +; RV32-NEXT: beq a1, t1, .LBB247_26 +; RV32-NEXT: .LBB247_25: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB247_26: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s5, 104(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu t1, a1, .LBB247_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB247_28: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu s1, a1, .LBB247_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: mv a4, ra +; RV32-NEXT: .LBB247_30: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 100(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu ra, a0, .LBB247_31 +; RV32-NEXT: j .LBB247_378 +; RV32-NEXT: .LBB247_31: +; RV32-NEXT: beq a1, s1, .LBB247_32 +; RV32-NEXT: j .LBB247_379 +; RV32-NEXT: .LBB247_32: +; RV32-NEXT: bltu s1, a1, .LBB247_33 +; RV32-NEXT: j .LBB247_380 +; RV32-NEXT: .LBB247_33: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu s11, a1, .LBB247_35 +; RV32-NEXT: .LBB247_34: +; RV32-NEXT: mv a4, s8 +; RV32-NEXT: .LBB247_35: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t5, 48(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s8, a0, .LBB247_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: .LBB247_37: +; RV32-NEXT: beq a1, s11, .LBB247_39 +; RV32-NEXT: # %bb.38: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB247_39: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 132(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s11, a1, .LBB247_41 +; RV32-NEXT: # %bb.40: +; RV32-NEXT: mv a1, s11 +; RV32-NEXT: .LBB247_41: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu t3, a1, .LBB247_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: mv a4, s2 +; RV32-NEXT: .LBB247_43: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s3, 96(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s11, 92(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s2, a0, .LBB247_44 +; RV32-NEXT: j .LBB247_381 +; RV32-NEXT: .LBB247_44: +; RV32-NEXT: beq a1, t3, .LBB247_45 +; RV32-NEXT: j .LBB247_382 +; RV32-NEXT: .LBB247_45: +; RV32-NEXT: bltu t3, a1, .LBB247_47 +; RV32-NEXT: .LBB247_46: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB247_47: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 16(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu t0, a1, .LBB247_49 +; RV32-NEXT: # %bb.48: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: .LBB247_49: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 24(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 20(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu s0, a0, .LBB247_51 +; RV32-NEXT: # %bb.50: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: .LBB247_51: +; RV32-NEXT: beq a1, t0, .LBB247_53 +; RV32-NEXT: # %bb.52: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_53: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s8, 116(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu t0, a1, .LBB247_55 +; RV32-NEXT: # %bb.54: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB247_55: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: bltu a6, a1, .LBB247_57 +; RV32-NEXT: # %bb.56: +; RV32-NEXT: mv s0, a4 +; RV32-NEXT: .LBB247_57: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 152(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s6, 80(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 28(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a0, .LBB247_58 +; RV32-NEXT: j .LBB247_383 +; RV32-NEXT: .LBB247_58: +; RV32-NEXT: beq a1, a6, .LBB247_59 +; RV32-NEXT: j .LBB247_384 +; RV32-NEXT: .LBB247_59: +; RV32-NEXT: bltu a6, a1, .LBB247_60 +; RV32-NEXT: j .LBB247_385 +; RV32-NEXT: .LBB247_60: +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu t2, a1, .LBB247_62 +; RV32-NEXT: .LBB247_61: +; RV32-NEXT: mv a4, s1 +; RV32-NEXT: .LBB247_62: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 36(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a6, 32(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu s1, a0, .LBB247_63 +; RV32-NEXT: j .LBB247_386 +; RV32-NEXT: .LBB247_63: +; RV32-NEXT: beq a1, t2, .LBB247_64 +; RV32-NEXT: j .LBB247_387 +; RV32-NEXT: .LBB247_64: +; RV32-NEXT: bltu t2, a1, .LBB247_66 +; RV32-NEXT: .LBB247_65: +; RV32-NEXT: mv a1, t2 +; RV32-NEXT: .LBB247_66: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t6, 108(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s1, 12(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu s1, a1, .LBB247_67 +; RV32-NEXT: j .LBB247_388 +; RV32-NEXT: .LBB247_67: +; RV32-NEXT: bltu a5, a0, .LBB247_68 +; RV32-NEXT: j .LBB247_389 +; RV32-NEXT: .LBB247_68: +; RV32-NEXT: beq a1, s1, .LBB247_69 +; RV32-NEXT: j .LBB247_390 +; RV32-NEXT: .LBB247_69: +; RV32-NEXT: bltu s1, a1, .LBB247_70 +; RV32-NEXT: j .LBB247_391 +; RV32-NEXT: .LBB247_70: +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bltu s0, a1, .LBB247_72 +; RV32-NEXT: .LBB247_71: +; RV32-NEXT: mv a5, a6 +; RV32-NEXT: .LBB247_72: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t3, 120(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 44(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 40(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a6, a0, .LBB247_73 +; RV32-NEXT: j .LBB247_392 +; RV32-NEXT: .LBB247_73: +; RV32-NEXT: beq a1, s0, .LBB247_74 +; RV32-NEXT: j .LBB247_393 +; RV32-NEXT: .LBB247_74: +; RV32-NEXT: bltu s0, a1, .LBB247_75 +; RV32-NEXT: j .LBB247_394 +; RV32-NEXT: .LBB247_75: +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: bltu s1, a1, .LBB247_77 +; RV32-NEXT: .LBB247_76: +; RV32-NEXT: mv s0, a2 +; RV32-NEXT: .LBB247_77: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw ra, 172(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s2, 76(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s7, 72(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 68(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 64(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 60(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 56(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a0, .LBB247_395 +; RV32-NEXT: # %bb.78: +; RV32-NEXT: bne a1, s1, .LBB247_396 +; RV32-NEXT: .LBB247_79: +; RV32-NEXT: bgeu s1, a1, .LBB247_397 +; RV32-NEXT: .LBB247_80: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu t5, a1, .LBB247_82 +; RV32-NEXT: .LBB247_81: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 328(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_82: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t2, t2, s0 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw t2, 208(t2) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s4, 140(s0) # 4-byte Folded Reload +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: addi s1, zero, 232 +; RV32-NEXT: mul s0, s0, s1 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: lw s9, 328(s0) # 4-byte Folded Reload +; RV32-NEXT: bgeu s9, a0, .LBB247_398 +; RV32-NEXT: # %bb.83: +; RV32-NEXT: bne a1, t5, .LBB247_399 +; RV32-NEXT: .LBB247_84: +; RV32-NEXT: bltu t5, a1, .LBB247_86 +; RV32-NEXT: .LBB247_85: +; RV32-NEXT: mv a1, t5 +; RV32-NEXT: .LBB247_86: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: addi s0, zero, 232 +; RV32-NEXT: mul t5, t5, s0 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: lw s1, 392(t5) # 4-byte Folded Reload +; RV32-NEXT: bltu s1, a1, .LBB247_88 +; RV32-NEXT: # %bb.87: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: .LBB247_88: +; RV32-NEXT: bltu t4, a0, .LBB247_90 +; RV32-NEXT: # %bb.89: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: .LBB247_90: +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: addi t5, zero, 232 +; RV32-NEXT: mul t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: lw t4, 392(t4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB247_92 +; RV32-NEXT: # %bb.91: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_92: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t4, zero, 232 +; RV32-NEXT: mul a2, a2, t4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 392(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_400 +; RV32-NEXT: # %bb.93: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a5, a1, .LBB247_401 +; RV32-NEXT: .LBB247_94: +; RV32-NEXT: bgeu a3, a0, .LBB247_402 +; RV32-NEXT: .LBB247_95: +; RV32-NEXT: beq a1, a5, .LBB247_97 +; RV32-NEXT: .LBB247_96: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_97: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 52(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a5, a1, .LBB247_403 +; RV32-NEXT: # %bb.98: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a4, a1, .LBB247_404 +; RV32-NEXT: .LBB247_99: +; RV32-NEXT: bgeu a6, a0, .LBB247_405 +; RV32-NEXT: .LBB247_100: +; RV32-NEXT: beq a1, a4, .LBB247_102 +; RV32-NEXT: .LBB247_101: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_102: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 136(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a4, a1, .LBB247_406 +; RV32-NEXT: # %bb.103: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s2, a1, .LBB247_407 +; RV32-NEXT: .LBB247_104: +; RV32-NEXT: bgeu s7, a0, .LBB247_408 +; RV32-NEXT: .LBB247_105: +; RV32-NEXT: bne a1, s2, .LBB247_409 +; RV32-NEXT: .LBB247_106: +; RV32-NEXT: bgeu s2, a1, .LBB247_410 +; RV32-NEXT: .LBB247_107: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s6, a1, .LBB247_109 +; RV32-NEXT: .LBB247_108: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 332(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_109: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s7, 332(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s7, a0, .LBB247_411 +; RV32-NEXT: # %bb.110: +; RV32-NEXT: bne a1, s6, .LBB247_412 +; RV32-NEXT: .LBB247_111: +; RV32-NEXT: bltu s6, a1, .LBB247_113 +; RV32-NEXT: .LBB247_112: +; RV32-NEXT: mv a1, s6 +; RV32-NEXT: .LBB247_113: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a5, 396(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a1, .LBB247_115 +; RV32-NEXT: # %bb.114: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB247_115: +; RV32-NEXT: bltu a3, a0, .LBB247_117 +; RV32-NEXT: # %bb.116: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB247_117: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 396(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 88(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB247_119 +; RV32-NEXT: # %bb.118: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_119: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 396(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 112(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_413 +; RV32-NEXT: # %bb.120: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s3, a1, .LBB247_414 +; RV32-NEXT: .LBB247_121: +; RV32-NEXT: bgeu s11, a0, .LBB247_415 +; RV32-NEXT: .LBB247_122: +; RV32-NEXT: bne a1, s3, .LBB247_416 +; RV32-NEXT: .LBB247_123: +; RV32-NEXT: bgeu s3, a1, .LBB247_417 +; RV32-NEXT: .LBB247_124: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s10, a1, .LBB247_126 +; RV32-NEXT: .LBB247_125: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 336(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_126: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 336(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s11, a0, .LBB247_418 +; RV32-NEXT: # %bb.127: +; RV32-NEXT: bne a1, s10, .LBB247_419 +; RV32-NEXT: .LBB247_128: +; RV32-NEXT: bgeu s10, a1, .LBB247_420 +; RV32-NEXT: .LBB247_129: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu t1, a1, .LBB247_131 +; RV32-NEXT: .LBB247_130: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 340(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_131: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s10, 340(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s10, a0, .LBB247_421 +; RV32-NEXT: # %bb.132: +; RV32-NEXT: bne a1, t1, .LBB247_422 +; RV32-NEXT: .LBB247_133: +; RV32-NEXT: bgeu t1, a1, .LBB247_423 +; RV32-NEXT: .LBB247_134: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s5, a1, .LBB247_136 +; RV32-NEXT: .LBB247_135: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 344(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_136: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t1, 344(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu t1, a0, .LBB247_424 +; RV32-NEXT: # %bb.137: +; RV32-NEXT: bne a1, s5, .LBB247_425 +; RV32-NEXT: .LBB247_138: +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: bgeu s5, a1, .LBB247_426 +; RV32-NEXT: .LBB247_139: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu t6, a1, .LBB247_141 +; RV32-NEXT: .LBB247_140: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 348(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_141: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s5, 348(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu s5, a0, .LBB247_427 +; RV32-NEXT: # %bb.142: +; RV32-NEXT: bne a1, t6, .LBB247_428 +; RV32-NEXT: .LBB247_143: +; RV32-NEXT: bltu t6, a1, .LBB247_145 +; RV32-NEXT: .LBB247_144: +; RV32-NEXT: mv a1, t6 +; RV32-NEXT: .LBB247_145: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a1, .LBB247_147 +; RV32-NEXT: # %bb.146: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB247_147: +; RV32-NEXT: bltu a5, a0, .LBB247_149 +; RV32-NEXT: # %bb.148: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB247_149: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t4, 400(a4) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 124(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t4, .LBB247_151 +; RV32-NEXT: # %bb.150: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_151: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 400(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 128(a5) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_429 +; RV32-NEXT: # %bb.152: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s8, a1, .LBB247_430 +; RV32-NEXT: .LBB247_153: +; RV32-NEXT: bgeu a3, a0, .LBB247_431 +; RV32-NEXT: .LBB247_154: +; RV32-NEXT: bne a1, s8, .LBB247_432 +; RV32-NEXT: .LBB247_155: +; RV32-NEXT: bgeu s8, a1, .LBB247_433 +; RV32-NEXT: .LBB247_156: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu t3, a1, .LBB247_158 +; RV32-NEXT: .LBB247_157: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 352(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_158: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 352(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu s9, a0, .LBB247_434 +; RV32-NEXT: # %bb.159: +; RV32-NEXT: bne a1, t3, .LBB247_435 +; RV32-NEXT: .LBB247_160: +; RV32-NEXT: bltu t3, a1, .LBB247_162 +; RV32-NEXT: .LBB247_161: +; RV32-NEXT: mv a1, t3 +; RV32-NEXT: .LBB247_162: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a3, a1, .LBB247_164 +; RV32-NEXT: # %bb.163: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB247_164: +; RV32-NEXT: bltu a4, a0, .LBB247_166 +; RV32-NEXT: # %bb.165: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB247_166: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s10, 404(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 164(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s10, .LBB247_168 +; RV32-NEXT: # %bb.167: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_168: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 404(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 144(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_436 +; RV32-NEXT: # %bb.169: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a7, a1, .LBB247_437 +; RV32-NEXT: .LBB247_170: +; RV32-NEXT: bgeu a5, a0, .LBB247_438 +; RV32-NEXT: .LBB247_171: +; RV32-NEXT: beq a1, a7, .LBB247_173 +; RV32-NEXT: .LBB247_172: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_173: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 156(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a7, a1, .LBB247_175 +; RV32-NEXT: # %bb.174: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: .LBB247_175: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s1, a1, .LBB247_177 +; RV32-NEXT: # %bb.176: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 356(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_177: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s8, 356(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s2, 196(a6) # 4-byte Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw a7, 220(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu s8, a0, .LBB247_439 +; RV32-NEXT: # %bb.178: +; RV32-NEXT: bne a1, s1, .LBB247_440 +; RV32-NEXT: .LBB247_179: +; RV32-NEXT: bgeu s1, a1, .LBB247_441 +; RV32-NEXT: .LBB247_180: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s4, a1, .LBB247_182 +; RV32-NEXT: .LBB247_181: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 360(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_182: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t5, 360(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu t5, a0, .LBB247_442 +; RV32-NEXT: # %bb.183: +; RV32-NEXT: bne a1, s4, .LBB247_443 +; RV32-NEXT: .LBB247_184: +; RV32-NEXT: bltu s4, a1, .LBB247_186 +; RV32-NEXT: .LBB247_185: +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: .LBB247_186: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s0, 408(a6) # 4-byte Folded Reload +; RV32-NEXT: bltu s0, a1, .LBB247_188 +; RV32-NEXT: # %bb.187: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB247_188: +; RV32-NEXT: bltu a4, a0, .LBB247_190 +; RV32-NEXT: # %bb.189: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB247_190: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw s11, 408(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s11, .LBB247_192 +; RV32-NEXT: # %bb.191: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_192: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 408(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 148(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_444 +; RV32-NEXT: # %bb.193: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu t0, a1, .LBB247_445 +; RV32-NEXT: .LBB247_194: +; RV32-NEXT: bgeu a4, a0, .LBB247_446 +; RV32-NEXT: .LBB247_195: +; RV32-NEXT: beq a1, t0, .LBB247_197 +; RV32-NEXT: .LBB247_196: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_197: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 168(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu t0, a1, .LBB247_199 +; RV32-NEXT: # %bb.198: +; RV32-NEXT: mv a1, t0 +; RV32-NEXT: .LBB247_199: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a5, a1, .LBB247_201 +; RV32-NEXT: # %bb.200: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 364(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_201: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw t0, 364(a6) # 4-byte Folded Reload +; RV32-NEXT: bltu t0, a0, .LBB247_203 +; RV32-NEXT: # %bb.202: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: .LBB247_203: +; RV32-NEXT: beq a1, a5, .LBB247_205 +; RV32-NEXT: # %bb.204: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_205: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t0, 200(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 184(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 204(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a1, .LBB247_207 +; RV32-NEXT: # %bb.206: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB247_207: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 160(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a1, .LBB247_209 +; RV32-NEXT: # %bb.208: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 368(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_209: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s4, 368(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu s4, a0, .LBB247_447 +; RV32-NEXT: # %bb.210: +; RV32-NEXT: bne a1, a5, .LBB247_448 +; RV32-NEXT: .LBB247_211: +; RV32-NEXT: bltu a5, a1, .LBB247_213 +; RV32-NEXT: .LBB247_212: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB247_213: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw a5, 412(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a1, .LBB247_215 +; RV32-NEXT: # %bb.214: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB247_215: +; RV32-NEXT: bltu a3, a0, .LBB247_217 +; RV32-NEXT: # %bb.216: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB247_217: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 412(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 216(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t5, .LBB247_219 +; RV32-NEXT: # %bb.218: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_219: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 412(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 180(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_449 +; RV32-NEXT: # %bb.220: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu ra, a1, .LBB247_450 +; RV32-NEXT: .LBB247_221: +; RV32-NEXT: bgeu a4, a0, .LBB247_451 +; RV32-NEXT: .LBB247_222: +; RV32-NEXT: beq a1, ra, .LBB247_224 +; RV32-NEXT: .LBB247_223: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_224: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 176(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu ra, a1, .LBB247_226 +; RV32-NEXT: # %bb.225: +; RV32-NEXT: mv a1, ra +; RV32-NEXT: .LBB247_226: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a4, a1, .LBB247_228 +; RV32-NEXT: # %bb.227: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a2, a2, a6 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 372(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_228: +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a6, a6, t2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: lw s7, 372(a6) # 4-byte Folded Reload +; RV32-NEXT: bgeu s7, a0, .LBB247_452 +; RV32-NEXT: # %bb.229: +; RV32-NEXT: bne a1, a4, .LBB247_453 +; RV32-NEXT: .LBB247_230: +; RV32-NEXT: bltu a4, a1, .LBB247_232 +; RV32-NEXT: .LBB247_231: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB247_232: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 416(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a1, .LBB247_234 +; RV32-NEXT: # %bb.233: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: .LBB247_234: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a6, 244(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a3, a0, .LBB247_236 +; RV32-NEXT: # %bb.235: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: .LBB247_236: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s9, 416(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t5, 264(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 212(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, s9, .LBB247_238 +; RV32-NEXT: # %bb.237: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_238: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 416(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 188(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_454 +; RV32-NEXT: # %bb.239: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a3, a1, .LBB247_455 +; RV32-NEXT: .LBB247_240: +; RV32-NEXT: bgeu s0, a0, .LBB247_456 +; RV32-NEXT: .LBB247_241: +; RV32-NEXT: bne a1, a3, .LBB247_457 +; RV32-NEXT: .LBB247_242: +; RV32-NEXT: bltu a3, a1, .LBB247_244 +; RV32-NEXT: .LBB247_243: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB247_244: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 192(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a3, a1, .LBB247_246 +; RV32-NEXT: # %bb.245: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a2, a2, t2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 376(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_246: +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: lw s11, 376(t2) # 4-byte Folded Reload +; RV32-NEXT: bgeu s11, a0, .LBB247_458 +; RV32-NEXT: # %bb.247: +; RV32-NEXT: bne a1, a3, .LBB247_459 +; RV32-NEXT: .LBB247_248: +; RV32-NEXT: bltu a3, a1, .LBB247_250 +; RV32-NEXT: .LBB247_249: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB247_250: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a3, a1, .LBB247_252 +; RV32-NEXT: # %bb.251: +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: .LBB247_252: +; RV32-NEXT: bltu s2, a0, .LBB247_254 +; RV32-NEXT: # %bb.253: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: .LBB247_254: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t2, 420(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t3, zero, 232 +; RV32-NEXT: mul a3, a3, t3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw t4, 232(a3) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t2, .LBB247_256 +; RV32-NEXT: # %bb.255: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_256: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 420(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a2, a1, .LBB247_258 +; RV32-NEXT: # %bb.257: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB247_258: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 228(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu s1, a1, .LBB247_460 +; RV32-NEXT: # %bb.259: +; RV32-NEXT: bgeu t0, a0, .LBB247_461 +; RV32-NEXT: .LBB247_260: +; RV32-NEXT: bne a1, s1, .LBB247_462 +; RV32-NEXT: .LBB247_261: +; RV32-NEXT: bgeu s1, a1, .LBB247_463 +; RV32-NEXT: .LBB247_262: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu t1, a1, .LBB247_264 +; RV32-NEXT: .LBB247_263: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a2, a2, t0 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 380(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_264: +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul t0, t0, t2 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw t0, 380(t0) # 4-byte Folded Reload +; RV32-NEXT: bgeu t0, a0, .LBB247_464 +; RV32-NEXT: # %bb.265: +; RV32-NEXT: bne a1, t1, .LBB247_465 +; RV32-NEXT: .LBB247_266: +; RV32-NEXT: bltu t1, a1, .LBB247_268 +; RV32-NEXT: .LBB247_267: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: .LBB247_268: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: addi t1, zero, 232 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: lw s0, 424(t0) # 4-byte Folded Reload +; RV32-NEXT: bltu s0, a1, .LBB247_270 +; RV32-NEXT: # %bb.269: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB247_270: +; RV32-NEXT: bltu a4, a0, .LBB247_272 +; RV32-NEXT: # %bb.271: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB247_272: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw t3, 424(a4) # 4-byte Folded Reload +; RV32-NEXT: beq a1, t3, .LBB247_274 +; RV32-NEXT: # %bb.273: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_274: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 424(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi t0, zero, 232 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 224(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a2, a1, .LBB247_466 +; RV32-NEXT: # %bb.275: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a7, a1, .LBB247_467 +; RV32-NEXT: .LBB247_276: +; RV32-NEXT: bgeu a5, a0, .LBB247_468 +; RV32-NEXT: .LBB247_277: +; RV32-NEXT: bne a1, a7, .LBB247_469 +; RV32-NEXT: .LBB247_278: +; RV32-NEXT: bgeu a7, a1, .LBB247_470 +; RV32-NEXT: .LBB247_279: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a4, a1, .LBB247_281 +; RV32-NEXT: .LBB247_280: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 384(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_281: +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t1, 384(a5) # 4-byte Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a5, a5, a7 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: lw t0, 312(a5) # 4-byte Folded Reload +; RV32-NEXT: bltu t1, a0, .LBB247_283 +; RV32-NEXT: # %bb.282: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: .LBB247_283: +; RV32-NEXT: beq a1, a4, .LBB247_285 +; RV32-NEXT: # %bb.284: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_285: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 236(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a1, .LBB247_287 +; RV32-NEXT: # %bb.286: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB247_287: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a3, a1, .LBB247_289 +; RV32-NEXT: # %bb.288: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a2, 388(a2) # 4-byte Folded Reload +; RV32-NEXT: .LBB247_289: +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a7, 388(a4) # 4-byte Folded Reload +; RV32-NEXT: bltu a7, a0, .LBB247_291 +; RV32-NEXT: # %bb.290: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: .LBB247_291: +; RV32-NEXT: beq a1, a3, .LBB247_293 +; RV32-NEXT: # %bb.292: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_293: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw t1, 256(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s0, 260(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a7, 272(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 252(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 248(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a3, a1, .LBB247_471 +; RV32-NEXT: # %bb.294: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s1, a1, .LBB247_472 +; RV32-NEXT: .LBB247_295: +; RV32-NEXT: bgeu t4, a0, .LBB247_473 +; RV32-NEXT: .LBB247_296: +; RV32-NEXT: beq a1, s1, .LBB247_298 +; RV32-NEXT: .LBB247_297: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_298: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 240(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu s1, a1, .LBB247_474 +; RV32-NEXT: # %bb.299: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a6, a1, .LBB247_475 +; RV32-NEXT: .LBB247_300: +; RV32-NEXT: bgeu a3, a0, .LBB247_476 +; RV32-NEXT: .LBB247_301: +; RV32-NEXT: bne a1, a6, .LBB247_477 +; RV32-NEXT: .LBB247_302: +; RV32-NEXT: bgeu a6, a1, .LBB247_478 +; RV32-NEXT: .LBB247_303: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a4, a1, .LBB247_305 +; RV32-NEXT: .LBB247_304: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: .LBB247_305: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a6, zero, 232 +; RV32-NEXT: mul a3, a3, a6 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a6, 304(a3) # 4-byte Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi t2, zero, 232 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a3, 284(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a0, .LBB247_307 +; RV32-NEXT: # %bb.306: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: .LBB247_307: +; RV32-NEXT: beq a1, a4, .LBB247_309 +; RV32-NEXT: # %bb.308: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_309: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 268(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a4, a1, .LBB247_479 +; RV32-NEXT: # %bb.310: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s0, a1, .LBB247_480 +; RV32-NEXT: .LBB247_311: +; RV32-NEXT: bgeu t1, a0, .LBB247_481 +; RV32-NEXT: .LBB247_312: +; RV32-NEXT: bne a1, s0, .LBB247_482 +; RV32-NEXT: .LBB247_313: +; RV32-NEXT: bgeu s0, a1, .LBB247_483 +; RV32-NEXT: .LBB247_314: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a5, a1, .LBB247_484 +; RV32-NEXT: .LBB247_315: +; RV32-NEXT: bgeu t5, a0, .LBB247_485 +; RV32-NEXT: .LBB247_316: +; RV32-NEXT: beq a1, a5, .LBB247_318 +; RV32-NEXT: .LBB247_317: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_318: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 276(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a5, a1, .LBB247_486 +; RV32-NEXT: # %bb.319: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a4, a1, .LBB247_487 +; RV32-NEXT: .LBB247_320: +; RV32-NEXT: bgeu a7, a0, .LBB247_488 +; RV32-NEXT: .LBB247_321: +; RV32-NEXT: beq a1, a4, .LBB247_323 +; RV32-NEXT: .LBB247_322: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_323: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw s1, 296(a2) # 4-byte Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a2, a2, a5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a5, 292(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a1, .LBB247_325 +; RV32-NEXT: # %bb.324: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: .LBB247_325: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a4, a4, a7 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: lw a4, 280(a4) # 4-byte Folded Reload +; RV32-NEXT: bgeu a3, a1, .LBB247_489 +; RV32-NEXT: # %bb.326: +; RV32-NEXT: bgeu a4, a0, .LBB247_490 +; RV32-NEXT: .LBB247_327: +; RV32-NEXT: bne a1, a3, .LBB247_491 +; RV32-NEXT: .LBB247_328: +; RV32-NEXT: bltu a3, a1, .LBB247_330 +; RV32-NEXT: .LBB247_329: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB247_330: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a4, zero, 232 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a4, 288(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a1, .LBB247_332 +; RV32-NEXT: # %bb.331: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: .LBB247_332: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a7, zero, 232 +; RV32-NEXT: mul a3, a3, a7 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw s0, 316(a3) # 4-byte Folded Reload +; RV32-NEXT: bltu a4, a0, .LBB247_334 +; RV32-NEXT: # %bb.333: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: .LBB247_334: +; RV32-NEXT: beq a1, a5, .LBB247_336 +; RV32-NEXT: # %bb.335: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_336: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a4, 320(a2) # 4-byte Folded Reload +; RV32-NEXT: bltu a5, a1, .LBB247_338 +; RV32-NEXT: # %bb.337: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB247_338: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 300(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a5, a1, .LBB247_492 +; RV32-NEXT: # %bb.339: +; RV32-NEXT: bgeu s1, a0, .LBB247_493 +; RV32-NEXT: .LBB247_340: +; RV32-NEXT: bne a1, a5, .LBB247_494 +; RV32-NEXT: .LBB247_341: +; RV32-NEXT: bltu a5, a1, .LBB247_343 +; RV32-NEXT: .LBB247_342: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB247_343: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: addi a5, zero, 232 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: lw a5, 308(a3) # 4-byte Folded Reload +; RV32-NEXT: bgeu a5, a1, .LBB247_495 +; RV32-NEXT: # %bb.344: +; RV32-NEXT: bgeu a6, a0, .LBB247_496 +; RV32-NEXT: .LBB247_345: +; RV32-NEXT: beq a1, a5, .LBB247_347 +; RV32-NEXT: .LBB247_346: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB247_347: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: lw a3, 324(a2) # 4-byte Folded Reload +; RV32-NEXT: bgeu a5, a1, .LBB247_497 +; RV32-NEXT: # %bb.348: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s0, a1, .LBB247_498 +; RV32-NEXT: .LBB247_349: +; RV32-NEXT: bgeu t0, a0, .LBB247_499 +; RV32-NEXT: .LBB247_350: +; RV32-NEXT: bne a1, s0, .LBB247_500 +; RV32-NEXT: .LBB247_351: +; RV32-NEXT: bgeu s0, a1, .LBB247_501 +; RV32-NEXT: .LBB247_352: +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a3, a1, .LBB247_502 +; RV32-NEXT: .LBB247_353: +; RV32-NEXT: bgeu a4, a0, .LBB247_503 +; RV32-NEXT: .LBB247_354: +; RV32-NEXT: bne a1, a3, .LBB247_504 +; RV32-NEXT: .LBB247_355: +; RV32-NEXT: bltu a3, a1, .LBB247_357 +; RV32-NEXT: .LBB247_356: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: .LBB247_357: +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: addi a3, zero, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw s11, 428(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 432(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 436(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 440(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 444(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 448(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 452(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 456(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 460(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 464(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 468(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 472(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 476(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 480 +; RV32-NEXT: ret +; RV32-NEXT: .LBB247_358: +; RV32-NEXT: mv s4, t6 +; RV32-NEXT: bgeu t6, a0, .LBB247_359 +; RV32-NEXT: j .LBB247_2 +; RV32-NEXT: .LBB247_359: +; RV32-NEXT: mv a0, t6 +; RV32-NEXT: bne a1, a3, .LBB247_360 +; RV32-NEXT: j .LBB247_3 +; RV32-NEXT: .LBB247_360: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: bgeu a3, a1, .LBB247_361 +; RV32-NEXT: j .LBB247_4 +; RV32-NEXT: .LBB247_361: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: bgeu a4, a1, .LBB247_362 +; RV32-NEXT: j .LBB247_5 +; RV32-NEXT: .LBB247_362: +; RV32-NEXT: mv a3, t4 +; RV32-NEXT: bgeu t4, a0, .LBB247_363 +; RV32-NEXT: j .LBB247_6 +; RV32-NEXT: .LBB247_363: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: beq a1, a4, .LBB247_505 +; RV32-NEXT: j .LBB247_7 +; RV32-NEXT: .LBB247_505: +; RV32-NEXT: j .LBB247_8 +; RV32-NEXT: .LBB247_364: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bgeu s10, a1, .LBB247_365 +; RV32-NEXT: j .LBB247_10 +; RV32-NEXT: .LBB247_365: +; RV32-NEXT: mv a4, a7 +; RV32-NEXT: bgeu a7, a0, .LBB247_366 +; RV32-NEXT: j .LBB247_11 +; RV32-NEXT: .LBB247_366: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, s10, .LBB247_367 +; RV32-NEXT: j .LBB247_12 +; RV32-NEXT: .LBB247_367: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu s10, a1, .LBB247_368 +; RV32-NEXT: j .LBB247_13 +; RV32-NEXT: .LBB247_368: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu s3, a1, .LBB247_506 +; RV32-NEXT: j .LBB247_14 +; RV32-NEXT: .LBB247_506: +; RV32-NEXT: j .LBB247_15 +; RV32-NEXT: .LBB247_369: +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: bne a1, s3, .LBB247_370 +; RV32-NEXT: j .LBB247_17 +; RV32-NEXT: .LBB247_370: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu s3, a1, .LBB247_371 +; RV32-NEXT: j .LBB247_18 +; RV32-NEXT: .LBB247_371: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bgeu a5, a1, .LBB247_372 +; RV32-NEXT: j .LBB247_19 +; RV32-NEXT: .LBB247_372: +; RV32-NEXT: mv a4, t5 +; RV32-NEXT: bgeu t5, a0, .LBB247_373 +; RV32-NEXT: j .LBB247_20 +; RV32-NEXT: .LBB247_373: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB247_374 +; RV32-NEXT: j .LBB247_21 +; RV32-NEXT: .LBB247_374: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu a5, a1, .LBB247_375 +; RV32-NEXT: j .LBB247_22 +; RV32-NEXT: .LBB247_375: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bgeu t1, a1, .LBB247_376 +; RV32-NEXT: j .LBB247_23 +; RV32-NEXT: .LBB247_376: +; RV32-NEXT: mv a4, s5 +; RV32-NEXT: bgeu s5, a0, .LBB247_377 +; RV32-NEXT: j .LBB247_24 +; RV32-NEXT: .LBB247_377: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t1, .LBB247_507 +; RV32-NEXT: j .LBB247_25 +; RV32-NEXT: .LBB247_507: +; RV32-NEXT: j .LBB247_26 +; RV32-NEXT: .LBB247_378: +; RV32-NEXT: mv a0, ra +; RV32-NEXT: bne a1, s1, .LBB247_379 +; RV32-NEXT: j .LBB247_32 +; RV32-NEXT: .LBB247_379: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bgeu s1, a1, .LBB247_380 +; RV32-NEXT: j .LBB247_33 +; RV32-NEXT: .LBB247_380: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu s11, a1, .LBB247_508 +; RV32-NEXT: j .LBB247_34 +; RV32-NEXT: .LBB247_508: +; RV32-NEXT: j .LBB247_35 +; RV32-NEXT: .LBB247_381: +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: bne a1, t3, .LBB247_382 +; RV32-NEXT: j .LBB247_45 +; RV32-NEXT: .LBB247_382: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bltu t3, a1, .LBB247_509 +; RV32-NEXT: j .LBB247_46 +; RV32-NEXT: .LBB247_509: +; RV32-NEXT: j .LBB247_47 +; RV32-NEXT: .LBB247_383: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, a6, .LBB247_384 +; RV32-NEXT: j .LBB247_59 +; RV32-NEXT: .LBB247_384: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: bgeu a6, a1, .LBB247_385 +; RV32-NEXT: j .LBB247_60 +; RV32-NEXT: .LBB247_385: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: bltu t2, a1, .LBB247_510 +; RV32-NEXT: j .LBB247_61 +; RV32-NEXT: .LBB247_510: +; RV32-NEXT: j .LBB247_62 +; RV32-NEXT: .LBB247_386: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: bne a1, t2, .LBB247_387 +; RV32-NEXT: j .LBB247_64 +; RV32-NEXT: .LBB247_387: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bltu t2, a1, .LBB247_511 +; RV32-NEXT: j .LBB247_65 +; RV32-NEXT: .LBB247_511: +; RV32-NEXT: j .LBB247_66 +; RV32-NEXT: .LBB247_388: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bgeu a5, a0, .LBB247_389 +; RV32-NEXT: j .LBB247_68 +; RV32-NEXT: .LBB247_389: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, s1, .LBB247_390 +; RV32-NEXT: j .LBB247_69 +; RV32-NEXT: .LBB247_390: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu s1, a1, .LBB247_391 +; RV32-NEXT: j .LBB247_70 +; RV32-NEXT: .LBB247_391: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bltu s0, a1, .LBB247_512 +; RV32-NEXT: j .LBB247_71 +; RV32-NEXT: .LBB247_512: +; RV32-NEXT: j .LBB247_72 +; RV32-NEXT: .LBB247_392: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, s0, .LBB247_393 +; RV32-NEXT: j .LBB247_74 +; RV32-NEXT: .LBB247_393: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bgeu s0, a1, .LBB247_394 +; RV32-NEXT: j .LBB247_75 +; RV32-NEXT: .LBB247_394: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: bltu s1, a1, .LBB247_513 +; RV32-NEXT: j .LBB247_76 +; RV32-NEXT: .LBB247_513: +; RV32-NEXT: j .LBB247_77 +; RV32-NEXT: .LBB247_395: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beq a1, s1, .LBB247_79 +; RV32-NEXT: .LBB247_396: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: bltu s1, a1, .LBB247_80 +; RV32-NEXT: .LBB247_397: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu t5, a1, .LBB247_81 +; RV32-NEXT: j .LBB247_82 +; RV32-NEXT: .LBB247_398: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t5, .LBB247_84 +; RV32-NEXT: .LBB247_399: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu t5, a1, .LBB247_85 +; RV32-NEXT: j .LBB247_86 +; RV32-NEXT: .LBB247_400: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a5, a1, .LBB247_94 +; RV32-NEXT: .LBB247_401: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a3, a0, .LBB247_95 +; RV32-NEXT: .LBB247_402: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: bne a1, a5, .LBB247_96 +; RV32-NEXT: j .LBB247_97 +; RV32-NEXT: .LBB247_403: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a4, a1, .LBB247_99 +; RV32-NEXT: .LBB247_404: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a6, a0, .LBB247_100 +; RV32-NEXT: .LBB247_405: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a4, .LBB247_101 +; RV32-NEXT: j .LBB247_102 +; RV32-NEXT: .LBB247_406: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s2, a1, .LBB247_104 +; RV32-NEXT: .LBB247_407: +; RV32-NEXT: mv a2, s7 +; RV32-NEXT: bltu s7, a0, .LBB247_105 +; RV32-NEXT: .LBB247_408: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s2, .LBB247_106 +; RV32-NEXT: .LBB247_409: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s2, a1, .LBB247_107 +; RV32-NEXT: .LBB247_410: +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s6, a1, .LBB247_108 +; RV32-NEXT: j .LBB247_109 +; RV32-NEXT: .LBB247_411: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, s6, .LBB247_111 +; RV32-NEXT: .LBB247_412: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu s6, a1, .LBB247_112 +; RV32-NEXT: j .LBB247_113 +; RV32-NEXT: .LBB247_413: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s3, a1, .LBB247_121 +; RV32-NEXT: .LBB247_414: +; RV32-NEXT: mv a2, s11 +; RV32-NEXT: bltu s11, a0, .LBB247_122 +; RV32-NEXT: .LBB247_415: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s3, .LBB247_123 +; RV32-NEXT: .LBB247_416: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s3, a1, .LBB247_124 +; RV32-NEXT: .LBB247_417: +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s10, a1, .LBB247_125 +; RV32-NEXT: j .LBB247_126 +; RV32-NEXT: .LBB247_418: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, s10, .LBB247_128 +; RV32-NEXT: .LBB247_419: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s10, a1, .LBB247_129 +; RV32-NEXT: .LBB247_420: +; RV32-NEXT: mv a1, s10 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu t1, a1, .LBB247_130 +; RV32-NEXT: j .LBB247_131 +; RV32-NEXT: .LBB247_421: +; RV32-NEXT: mv a0, s10 +; RV32-NEXT: beq a1, t1, .LBB247_133 +; RV32-NEXT: .LBB247_422: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu t1, a1, .LBB247_134 +; RV32-NEXT: .LBB247_423: +; RV32-NEXT: mv a1, t1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s5, a1, .LBB247_135 +; RV32-NEXT: j .LBB247_136 +; RV32-NEXT: .LBB247_424: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s5, .LBB247_138 +; RV32-NEXT: .LBB247_425: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv t1, t2 +; RV32-NEXT: bltu s5, a1, .LBB247_139 +; RV32-NEXT: .LBB247_426: +; RV32-NEXT: mv a1, s5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu t6, a1, .LBB247_140 +; RV32-NEXT: j .LBB247_141 +; RV32-NEXT: .LBB247_427: +; RV32-NEXT: mv a0, s5 +; RV32-NEXT: beq a1, t6, .LBB247_143 +; RV32-NEXT: .LBB247_428: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu t6, a1, .LBB247_144 +; RV32-NEXT: j .LBB247_145 +; RV32-NEXT: .LBB247_429: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s8, a1, .LBB247_153 +; RV32-NEXT: .LBB247_430: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a3, a0, .LBB247_154 +; RV32-NEXT: .LBB247_431: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, s8, .LBB247_155 +; RV32-NEXT: .LBB247_432: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s8, a1, .LBB247_156 +; RV32-NEXT: .LBB247_433: +; RV32-NEXT: mv a1, s8 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu t3, a1, .LBB247_157 +; RV32-NEXT: j .LBB247_158 +; RV32-NEXT: .LBB247_434: +; RV32-NEXT: mv a0, s9 +; RV32-NEXT: beq a1, t3, .LBB247_160 +; RV32-NEXT: .LBB247_435: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu t3, a1, .LBB247_161 +; RV32-NEXT: j .LBB247_162 +; RV32-NEXT: .LBB247_436: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a7, a1, .LBB247_170 +; RV32-NEXT: .LBB247_437: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a5, a0, .LBB247_171 +; RV32-NEXT: .LBB247_438: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: bne a1, a7, .LBB247_172 +; RV32-NEXT: j .LBB247_173 +; RV32-NEXT: .LBB247_439: +; RV32-NEXT: mv a0, s8 +; RV32-NEXT: beq a1, s1, .LBB247_179 +; RV32-NEXT: .LBB247_440: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s1, a1, .LBB247_180 +; RV32-NEXT: .LBB247_441: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu s4, a1, .LBB247_181 +; RV32-NEXT: j .LBB247_182 +; RV32-NEXT: .LBB247_442: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: beq a1, s4, .LBB247_184 +; RV32-NEXT: .LBB247_443: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu s4, a1, .LBB247_185 +; RV32-NEXT: j .LBB247_186 +; RV32-NEXT: .LBB247_444: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu t0, a1, .LBB247_194 +; RV32-NEXT: .LBB247_445: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB247_195 +; RV32-NEXT: .LBB247_446: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, t0, .LBB247_196 +; RV32-NEXT: j .LBB247_197 +; RV32-NEXT: .LBB247_447: +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: beq a1, a5, .LBB247_211 +; RV32-NEXT: .LBB247_448: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a5, a1, .LBB247_212 +; RV32-NEXT: j .LBB247_213 +; RV32-NEXT: .LBB247_449: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu ra, a1, .LBB247_221 +; RV32-NEXT: .LBB247_450: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB247_222 +; RV32-NEXT: .LBB247_451: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: bne a1, ra, .LBB247_223 +; RV32-NEXT: j .LBB247_224 +; RV32-NEXT: .LBB247_452: +; RV32-NEXT: mv a0, s7 +; RV32-NEXT: beq a1, a4, .LBB247_230 +; RV32-NEXT: .LBB247_453: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a4, a1, .LBB247_231 +; RV32-NEXT: j .LBB247_232 +; RV32-NEXT: .LBB247_454: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a3, a1, .LBB247_240 +; RV32-NEXT: .LBB247_455: +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: bltu s0, a0, .LBB247_241 +; RV32-NEXT: .LBB247_456: +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: beq a1, a3, .LBB247_242 +; RV32-NEXT: .LBB247_457: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a3, a1, .LBB247_243 +; RV32-NEXT: j .LBB247_244 +; RV32-NEXT: .LBB247_458: +; RV32-NEXT: mv a0, s11 +; RV32-NEXT: beq a1, a3, .LBB247_248 +; RV32-NEXT: .LBB247_459: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a3, a1, .LBB247_249 +; RV32-NEXT: j .LBB247_250 +; RV32-NEXT: .LBB247_460: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu t0, a0, .LBB247_260 +; RV32-NEXT: .LBB247_461: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s1, .LBB247_261 +; RV32-NEXT: .LBB247_462: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s1, a1, .LBB247_262 +; RV32-NEXT: .LBB247_463: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu t1, a1, .LBB247_263 +; RV32-NEXT: j .LBB247_264 +; RV32-NEXT: .LBB247_464: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, t1, .LBB247_266 +; RV32-NEXT: .LBB247_465: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu t1, a1, .LBB247_267 +; RV32-NEXT: j .LBB247_268 +; RV32-NEXT: .LBB247_466: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a7, a1, .LBB247_276 +; RV32-NEXT: .LBB247_467: +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: bltu a5, a0, .LBB247_277 +; RV32-NEXT: .LBB247_468: +; RV32-NEXT: mv a0, a5 +; RV32-NEXT: beq a1, a7, .LBB247_278 +; RV32-NEXT: .LBB247_469: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a7, a1, .LBB247_279 +; RV32-NEXT: .LBB247_470: +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a4, a1, .LBB247_280 +; RV32-NEXT: j .LBB247_281 +; RV32-NEXT: .LBB247_471: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s1, a1, .LBB247_295 +; RV32-NEXT: .LBB247_472: +; RV32-NEXT: mv a2, t4 +; RV32-NEXT: bltu t4, a0, .LBB247_296 +; RV32-NEXT: .LBB247_473: +; RV32-NEXT: mv a0, t4 +; RV32-NEXT: bne a1, s1, .LBB247_297 +; RV32-NEXT: j .LBB247_298 +; RV32-NEXT: .LBB247_474: +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a6, a1, .LBB247_300 +; RV32-NEXT: .LBB247_475: +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: bltu a3, a0, .LBB247_301 +; RV32-NEXT: .LBB247_476: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: beq a1, a6, .LBB247_302 +; RV32-NEXT: .LBB247_477: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu a6, a1, .LBB247_303 +; RV32-NEXT: .LBB247_478: +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bgeu a4, a1, .LBB247_304 +; RV32-NEXT: j .LBB247_305 +; RV32-NEXT: .LBB247_479: +; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s0, a1, .LBB247_311 +; RV32-NEXT: .LBB247_480: +; RV32-NEXT: mv a2, t1 +; RV32-NEXT: bltu t1, a0, .LBB247_312 +; RV32-NEXT: .LBB247_481: +; RV32-NEXT: mv a0, t1 +; RV32-NEXT: beq a1, s0, .LBB247_313 +; RV32-NEXT: .LBB247_482: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s0, a1, .LBB247_314 +; RV32-NEXT: .LBB247_483: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a5, a1, .LBB247_315 +; RV32-NEXT: .LBB247_484: +; RV32-NEXT: mv a2, t5 +; RV32-NEXT: bltu t5, a0, .LBB247_316 +; RV32-NEXT: .LBB247_485: +; RV32-NEXT: mv a0, t5 +; RV32-NEXT: bne a1, a5, .LBB247_317 +; RV32-NEXT: j .LBB247_318 +; RV32-NEXT: .LBB247_486: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a4, a1, .LBB247_320 +; RV32-NEXT: .LBB247_487: +; RV32-NEXT: mv a2, a7 +; RV32-NEXT: bltu a7, a0, .LBB247_321 +; RV32-NEXT: .LBB247_488: +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bne a1, a4, .LBB247_322 +; RV32-NEXT: j .LBB247_323 +; RV32-NEXT: .LBB247_489: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB247_327 +; RV32-NEXT: .LBB247_490: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB247_328 +; RV32-NEXT: .LBB247_491: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a3, a1, .LBB247_329 +; RV32-NEXT: j .LBB247_330 +; RV32-NEXT: .LBB247_492: +; RV32-NEXT: mv a2, s1 +; RV32-NEXT: bltu s1, a0, .LBB247_340 +; RV32-NEXT: .LBB247_493: +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: beq a1, a5, .LBB247_341 +; RV32-NEXT: .LBB247_494: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a5, a1, .LBB247_342 +; RV32-NEXT: j .LBB247_343 +; RV32-NEXT: .LBB247_495: +; RV32-NEXT: mv a2, a6 +; RV32-NEXT: bltu a6, a0, .LBB247_345 +; RV32-NEXT: .LBB247_496: +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: bne a1, a5, .LBB247_346 +; RV32-NEXT: j .LBB247_347 +; RV32-NEXT: .LBB247_497: +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu s0, a1, .LBB247_349 +; RV32-NEXT: .LBB247_498: +; RV32-NEXT: mv a2, t0 +; RV32-NEXT: bltu t0, a0, .LBB247_350 +; RV32-NEXT: .LBB247_499: +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: beq a1, s0, .LBB247_351 +; RV32-NEXT: .LBB247_500: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bltu s0, a1, .LBB247_352 +; RV32-NEXT: .LBB247_501: +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a3, a1, .LBB247_353 +; RV32-NEXT: .LBB247_502: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: bltu a4, a0, .LBB247_354 +; RV32-NEXT: .LBB247_503: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: beq a1, a3, .LBB247_355 +; RV32-NEXT: .LBB247_504: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgeu a3, a1, .LBB247_356 +; RV32-NEXT: j .LBB247_357 +; +; RV64-LABEL: vreduce_umax_v64i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a1, a0, 384 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vmaxu.vv v16, v24, v16 +; RV64-NEXT: vmaxu.vv v8, v8, v0 +; RV64-NEXT: vmaxu.vv v8, v8, v16 +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.v.i v25, 0 +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vredmaxu.vs v25, v8, v25 +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %v = load <64 x i64>, <64 x i64>* %x + %red = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> %v) + ret i64 %red +}