diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2800,6 +2800,52 @@ return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); } +static MVT getLMUL1VT(MVT VT) { + assert(VT.getVectorElementType().getSizeInBits() <= 64 && + "Unexpected vector MVT"); + return MVT::getScalableVectorVT( + VT.getVectorElementType(), + RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); +} + +// This function lowers an insert of a scalar operand Scalar into lane +// 0 of the vector regardless of the value of VL. The contents of the +// remaining lanes of the result vector are unspecified. VL is assumed +// to be non-zero. +static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, + MVT VT, SDLoc DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue Passthru = DAG.getUNDEF(VT); + if (VT.isFloatingPoint()) + // TODO: Use vmv.v.i for appropriate constants + return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); + + // Avoid the tricky legalization cases by falling back to using the + // splat code which already handles it gracefully. + const MVT XLenVT = Subtarget.getXLenVT(); + if (!Scalar.getValueType().bitsLE(XLenVT)) + return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, + DAG.getConstant(1, DL, XLenVT), + VT, DL, DAG, Subtarget); + + // If the operand is a constant, sign extend to increase our chances + // of being able to use a .vi instruction. ANY_EXTEND would become a + // a zero extend and the simm5 check in isel would fail. + // FIXME: Should we ignore the upper bits in isel instead? + unsigned ExtOpc = + isa(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; + Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); + // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or + // higher would involve overly constraining the register allocator for + // no purpose. + if (ConstantSDNode *Const = dyn_cast(Scalar)) { + if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) && + VT.bitsLE(getLMUL1VT(VT))) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); + } + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); +} + static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, bool &SwapSources, const RISCVSubtarget &Subtarget) { // We need to be able to widen elements to the next larger integer type. @@ -5095,8 +5141,7 @@ return Vec; return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); } - ValInVec = - DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL); + ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); } else { // On RV32, i64-element vectors must be specially handled to place the // value at element 0, by using two vslide1down instructions in sequence on @@ -5686,14 +5731,6 @@ return SDValue(); } -static MVT getLMUL1VT(MVT VT) { - assert(VT.getVectorElementType().getSizeInBits() <= 64 && - "Unexpected vector MVT"); - return MVT::getScalableVectorVT( - VT.getVectorElementType(), - RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); -} - static unsigned getRVVReductionOp(unsigned ISDOpcode) { switch (ISDOpcode) { default: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -696,9 +696,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV32NOM-LABEL: extractelt_sdiv_v4i32: ; RV32NOM: # %bb.0: -; RV32NOM-NEXT: li a0, -1 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32NOM-NEXT: vmv.s.x v9, a0 +; RV32NOM-NEXT: vmv.v.i v9, -1 ; RV32NOM-NEXT: vmv.v.i v10, 0 ; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32NOM-NEXT: vslideup.vi v10, v9, 3 @@ -735,9 +734,8 @@ ; ; RV64-LABEL: extractelt_sdiv_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV64-NEXT: vslideup.vi v10, v9, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -191,9 +191,8 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() { ; RV32-LABEL: buildvec_vid_step1_add0_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.i v9, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 2 @@ -215,9 +214,8 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() { ; RV32-LABEL: buildvec_vid_step2_add0_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 2 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.i v9, 2 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 2 @@ -507,53 +505,28 @@ } define void @buildvec_vid_step1o2_v4i32(<4 x i32>* %z0, <4 x i32>* %z1, <4 x i32>* %z2, <4 x i32>* %z3, <4 x i32>* %z4, <4 x i32>* %z5, <4 x i32>* %z6) { -; RV32-LABEL: buildvec_vid_step1o2_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vsrl.vi v8, v8, 1 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: vse32.v v8, (a4) -; RV32-NEXT: vmv.s.x v8, zero -; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v8, 1 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vse32.v v9, (a5) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v8, 3 -; RV32-NEXT: vse32.v v9, (a6) -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_vid_step1o2_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vsrl.vi v8, v8, 1 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: vmv.v.i v9, 1 -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: vse32.v v8, (a4) -; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64-NEXT: vslideup.vi v9, v8, 1 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vse32.v v9, (a5) -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV64-NEXT: vslideup.vi v9, v8, 3 -; RV64-NEXT: vse32.v v9, (a6) -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_vid_step1o2_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: vse32.v v8, (a4) +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v10, (a5) +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vse32.v v8, (a6) +; CHECK-NEXT: ret store <4 x i32> , <4 x i32>* %z0 store <4 x i32> , <4 x i32>* %z1 store <4 x i32> , <4 x i32>* %z2 @@ -574,23 +547,19 @@ ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: vadd.vi v8, v8, 3 ; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: vse16.v v8, (a4) -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: vmv.v.i v9, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma ; CHECK-NEXT: vse16.v v10, (a5) -; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 3 -; CHECK-NEXT: vse16.v v9, (a6) +; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vse16.v v8, (a6) ; CHECK-NEXT: ret store <4 x i16> , <4 x i16>* %z0 store <4 x i16> , <4 x i16>* %z1 @@ -721,9 +690,8 @@ define <16 x i8> @buildvec_not_vid_v16i8() { ; CHECK-LABEL: buildvec_not_vid_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 3 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -183,9 +183,8 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 5 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 +; RV32-NEXT: vmv.v.i v16, 5 ; RV32-NEXT: vmv.v.i v20, 2 ; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; RV32-NEXT: vslideup.vi v20, v16, 7 @@ -372,9 +371,8 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { ; CHECK-LABEL: splat_ve4_ins_i1ve3: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 3 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v10, v9, 1 @@ -473,9 +471,8 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 @@ -493,9 +490,8 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 6 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vmv.v.i v10, 6 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; RV32-NEXT: vslideup.vi v11, v10, 5 @@ -513,9 +509,8 @@ ; ; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 6 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vmv.v.i v10, 6 ; RV64-NEXT: vmv.v.i v11, 0 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; RV64-NEXT: vslideup.vi v11, v10, 5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -943,32 +943,32 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: li a1, 33 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v10, 3 -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 -; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 6 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv.s.x v12, a1 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v11, v9, 6 +; CHECK-NEXT: vslideup.vi v9, v11, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI53_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsrl.vv v11, v8, v11 -; CHECK-NEXT: vmulhu.vv v9, v11, v9 +; CHECK-NEXT: vle16.v v12, (a1) +; CHECK-NEXT: vsrl.vv v9, v8, v9 +; CHECK-NEXT: vmulhu.vv v9, v9, v12 ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v12 +; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: li a1, 33 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 +; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x @@ -995,8 +995,7 @@ ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: vmv.v.i v10, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v10, v9, 3 @@ -1205,9 +1204,8 @@ ; RV32-NEXT: vrsub.vi v10, v10, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmadd.vv v10, v8, v9 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.i v8, 1 ; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v9, v8, 2 @@ -4605,8 +4603,7 @@ ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v12, 1 ; LMULMAX1-RV32-NEXT: vmv.v.i v13, 2 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll @@ -189,9 +189,8 @@ define <2 x i64> @stepvector_v2i64() { ; RV32LMULMAX1-LABEL: stepvector_v2i64: ; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: li a0, 1 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32LMULMAX1-NEXT: vmv.v.i v9, 1 ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2 @@ -205,9 +204,8 @@ ; ; RV32LMULMAX2-LABEL: stepvector_v2i64: ; RV32LMULMAX2: # %bb.0: -; RV32LMULMAX2-NEXT: li a0, 1 ; RV32LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX2-NEXT: vmv.s.x v9, a0 +; RV32LMULMAX2-NEXT: vmv.v.i v9, 1 ; RV32LMULMAX2-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX2-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX2-NEXT: vslideup.vi v8, v9, 2 @@ -227,9 +225,8 @@ define <4 x i64> @stepvector_v4i64() { ; RV32LMULMAX1-LABEL: stepvector_v4i64: ; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: li a0, 1 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32LMULMAX1-NEXT: vmv.v.i v9, 1 ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2 @@ -268,9 +265,8 @@ define <8 x i64> @stepvector_v8i64() { ; RV32LMULMAX1-LABEL: stepvector_v8i64: ; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: li a0, 1 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32LMULMAX1-NEXT: vmv.v.i v9, 1 ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2 @@ -321,9 +317,8 @@ define <16 x i64> @stepvector_v16i64() { ; RV32LMULMAX1-LABEL: stepvector_v16i64: ; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: li a0, 1 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32LMULMAX1-NEXT: vmv.v.i v9, 1 ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2