diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -949,6 +949,7 @@ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); for (unsigned VPOpc : IntegerVPOps) setOperationAction(VPOpc, VT, Custom); @@ -3503,6 +3504,7 @@ case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_MUL: return lowerVECREDUCE(Op, DAG); case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: @@ -5117,6 +5119,24 @@ MVT VecVT = VecEVT.getSimpleVT(); MVT VecEltVT = VecVT.getVectorElementType(); + MVT XLenVT = Subtarget.getXLenVT(); + + // There is not correspond instruction in rvv, but we could lower it manually + // to avoid redundant calculation. + if (BaseOpc == ISD::MUL) { + if (VecEVT.getSimpleVT().isScalableVector()) + return SDValue(); + while (VecEVT.getSimpleVT().getVectorNumElements() != 1) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); + VecEVT = Lo.getValueType(); + Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); + } + SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, + DAG.getConstant(0, DL, XLenVT)); + return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); + } + unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); MVT ContainerVT = VecVT; @@ -5126,7 +5146,6 @@ } MVT M1VT = getLMUL1VT(ContainerVT); - MVT XLenVT = Subtarget.getXLenVT(); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); @@ -6944,6 +6963,7 @@ case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_MUL: if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) Results.push_back(V); break; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -124,7 +124,6 @@ default: return false; // These reductions have no equivalent in RVV - case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_fmul: return true; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -6808,8 +6808,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: lb a0, 1(a0) -; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -6824,9 +6825,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6842,11 +6846,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6862,13 +6871,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6885,16 +6901,25 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vslidedown.vi v10, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -6911,18 +6936,29 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vslidedown.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 16 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vrgather.vi v12, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -6939,21 +6975,34 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vi v16, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -6973,21 +7022,34 @@ ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vi v16, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v12, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -7016,8 +7078,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lh a0, 2(a0) -; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -7032,9 +7095,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -7050,11 +7116,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -7070,14 +7141,21 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vslidedown.vi v10, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -7093,16 +7171,25 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 16, e16, m4, ta, mu ; CHECK-NEXT: vslidedown.vi v12, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vrgather.vi v12, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -7119,18 +7206,29 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vi v16, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetivli zero, 16, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -7150,18 +7248,29 @@ ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vi v16, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetivli zero, 16, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -7190,8 +7299,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lw a0, 4(a0) -; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -7206,9 +7316,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -7224,12 +7337,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vslidedown.vi v10, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v10 -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -7244,14 +7362,21 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, mu ; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vslidedown.vi v12, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vrgather.vi v12, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -7267,16 +7392,25 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu ; CHECK-NEXT: vslidedown.vi v16, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vi v16, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -7295,16 +7429,25 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu ; CHECK-NEXT: vslidedown.vi v16, v8, 16 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 4 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vslidedown.vi v16, v8, 2 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vi v16, v8, 1 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -7343,12 +7486,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vslidedown.vi v9, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret @@ -7357,8 +7499,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: ld a0, 8(a0) -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, <2 x i64>* %x @@ -7373,13 +7516,15 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 2, e64, m2, ta, mu ; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: vrgather.vi v10, v8, 1 -; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, mu ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret @@ -7388,10 +7533,13 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 2, e64, m2, ta, mu ; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmul.vv v8, v8, v10 -; RV64-NEXT: vrgather.vi v10, v8, 1 -; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -7406,15 +7554,19 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 4, e64, m4, ta, mu ; RV32-NEXT: vslidedown.vi v12, v8, 4 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: vrgather.vi v12, v8, 1 -; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, mu ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret @@ -7423,12 +7575,17 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 4, e64, m4, ta, mu ; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vmul.vv v8, v8, v12 -; RV64-NEXT: vslidedown.vi v12, v8, 2 -; RV64-NEXT: vmul.vv v8, v8, v12 -; RV64-NEXT: vrgather.vi v12, v8, 1 -; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -7443,17 +7600,23 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 8, e64, m8, ta, mu ; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 4 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 2 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vrgather.vi v16, v8, 1 -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; RV32-NEXT: vslidedown.vi v12, v8, 4 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, mu ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret @@ -7462,14 +7625,21 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, mu ; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vslidedown.vi v16, v8, 4 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vslidedown.vi v16, v8, 2 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vrgather.vi v16, v8, 1 -; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -7487,18 +7657,24 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsetivli zero, 8, e64, m8, ta, mu ; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 4 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 2 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vrgather.vi v16, v8, 1 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, mu +; RV32-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; RV32-NEXT: vslidedown.vi v12, v8, 4 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; @@ -7509,14 +7685,21 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, mu ; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vslidedown.vi v16, v8, 4 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vslidedown.vi v16, v8, 2 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vrgather.vi v16, v8, 1 -; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -7529,30 +7712,1533 @@ define i64 @vreduce_mul_v64i64(<64 x i64>* %x) nounwind { ; RV32-LABEL: vreduce_mul_v64i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) -; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: vle64.v v0, (a1) -; RV32-NEXT: vmul.vv v16, v24, v16 -; RV32-NEXT: vmul.vv v8, v8, v0 -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, -384 +; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 372(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 368(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 364(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 360(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 356(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 352(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 232 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: addi a0, a0, 384 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 296(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 292(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 288(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 284(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 280(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 276(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 272(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 256(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 252(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 248(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 244(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 240(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 236(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 232(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 228(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 224(sp) # 4-byte Folded Spill ; RV32-NEXT: vslidedown.vi v16, v8, 8 -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 220(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 216(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 212(sp) # 4-byte Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 224 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vslidedown.vi v16, v8, 4 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 2 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vrgather.vi v16, v8, 1 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, mu +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 208 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 216 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 224 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 204(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 208 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 208(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 216 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 200(sp) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a0, v24 +; RV32-NEXT: sw a0, 196(sp) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 192(sp) # 4-byte Folded Spill ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: sw a0, 188(sp) # 4-byte Folded Spill +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-NEXT: addi a0, a3, 256 +; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 224 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 216 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 208 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 200 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 192 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 184 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 176 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 168 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 160 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 152 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 144 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 136 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 120 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 112 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 104 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 96 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 88 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 80 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: addi a0, sp, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 224 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 180(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 216 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 184(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 208 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 176(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 200 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 172(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 192 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 168(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 184 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 164(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 176 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 160(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 168 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 156(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 160 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 152(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 152 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 148(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 144 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 144(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 136 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 140(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 136(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 120 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 132(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 112 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 128(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 104 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 96 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 88 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 116(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 80 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 112(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 108(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 104(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 100(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 96(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s11, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32-NEXT: addi a0, sp, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s9, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s s8, v24 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s s6, v8 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-NEXT: addi a0, a3, 128 +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 224 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 216 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 208 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 200 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 192 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 184 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 176 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 168 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 160 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 152 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 144 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 136 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 120 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 112 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 104 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 96 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 88 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 80 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: addi a0, sp, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 224 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 216 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 208 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 200 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s4, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 192 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 184 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 176 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 168 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 160 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 152 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 144 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 136 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s0, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 120 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t6, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 112 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 104 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t5, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 96 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 88 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t4, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 80 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t3, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t2, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t1, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s t0, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: addi a0, sp, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s a7, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 320 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmv.x.s s10, v0 +; RV32-NEXT: vmv.x.s a6, v24 +; RV32-NEXT: vmv.x.s s7, v16 +; RV32-NEXT: vmv.x.s a5, v8 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-NEXT: vle32.v v8, (a3) +; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vslidedown.vi v24, v8, 5 +; RV32-NEXT: vslidedown.vi v0, v8, 2 +; RV32-NEXT: vmv.x.s a2, v24 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: mul a3, a0, a1 +; RV32-NEXT: mul a4, a3, a2 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mulhu ra, a3, a2 +; RV32-NEXT: add a4, ra, a4 +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: mul ra, a0, ra +; RV32-NEXT: mulhu a0, a0, a1 +; RV32-NEXT: add a0, a0, ra +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: mul a1, ra, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: mul a1, a3, a2 +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vslidedown.vi v24, v8, 6 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 9 +; RV32-NEXT: vslidedown.vi v24, v8, 8 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 11 +; RV32-NEXT: vslidedown.vi v24, v8, 10 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 13 +; RV32-NEXT: vslidedown.vi v24, v8, 12 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 15 +; RV32-NEXT: vslidedown.vi v24, v8, 14 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 17 +; RV32-NEXT: vslidedown.vi v24, v8, 16 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 19 +; RV32-NEXT: vslidedown.vi v24, v8, 18 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 21 +; RV32-NEXT: vslidedown.vi v24, v8, 20 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 23 +; RV32-NEXT: vslidedown.vi v24, v8, 22 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 25 +; RV32-NEXT: vslidedown.vi v24, v8, 24 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 27 +; RV32-NEXT: vslidedown.vi v24, v8, 26 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 29 +; RV32-NEXT: vslidedown.vi v24, v8, 28 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v24 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: vslidedown.vi v16, v8, 31 +; RV32-NEXT: vslidedown.vi v8, v8, 30 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: vmv.x.s a3, v8 +; RV32-NEXT: mulhu a4, a1, a3 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: mul a2, a1, s7 +; RV32-NEXT: mulhu a3, a1, a5 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a5 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a5 +; RV32-NEXT: mul a2, a1, s10 +; RV32-NEXT: mulhu a3, a1, a6 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a6 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a6 +; RV32-NEXT: lw a2, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, a7 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a7 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a7 +; RV32-NEXT: lw a2, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t0 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t0 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t0 +; RV32-NEXT: lw a2, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t1 +; RV32-NEXT: lw a2, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t2 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t2 +; RV32-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t3 +; RV32-NEXT: lw a2, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t4 +; RV32-NEXT: lw a2, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t5 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t5 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t5 +; RV32-NEXT: lw a2, 48(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, t6 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, t6 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, t6 +; RV32-NEXT: lw a2, 52(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s0 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s0 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s0 +; RV32-NEXT: lw a2, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s1 +; RV32-NEXT: lw a2, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s2 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s2 +; RV32-NEXT: lw a2, 64(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s3 +; RV32-NEXT: lw a2, 68(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s4 +; RV32-NEXT: lw a2, 72(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s5 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s5 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s5 +; RV32-NEXT: lw a2, 76(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s6 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s6 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s6 +; RV32-NEXT: lw a2, 80(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s8 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s8 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s8 +; RV32-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s9 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s9 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s9 +; RV32-NEXT: lw a2, 88(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: mulhu a3, a1, s11 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, s11 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, s11 +; RV32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 104(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 100(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 112(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 108(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 116(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 128(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 136(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 132(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 144(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 152(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 148(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 160(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 156(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 168(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 164(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 176(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 172(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 180(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 184(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 192(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 188(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 200(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 196(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 204(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 208(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 216(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 212(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 224(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 220(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 232(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 228(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 240(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 236(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 248(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 244(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 256(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 252(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 272(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 280(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 276(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 288(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 284(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 296(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 292(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: lw a2, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a2, a1, a2 +; RV32-NEXT: lw a4, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a1, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: mul a2, a1, a4 +; RV32-NEXT: lw a1, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: mul a1, a2, a1 +; RV32-NEXT: lw a4, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: mulhu a3, a2, a4 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: mul a0, a2, a4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 232 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 372(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 368(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 364(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 360(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 356(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 352(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 384 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_mul_v64i64: @@ -7568,14 +9254,21 @@ ; RV64-NEXT: vmul.vv v16, v24, v16 ; RV64-NEXT: vmul.vv v8, v8, v0 ; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, mu ; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vslidedown.vi v16, v8, 4 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vslidedown.vi v16, v8, 2 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vrgather.vi v16, v8, 1 -; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vsetivli zero, 2, e64, m2, ta, mu +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x