diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1462,29 +1462,42 @@
 // FIXME: We can optimize this when the type has sign or zero bits in one
 // of the halves.
 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
-                                   SDValue VL, SelectionDAG &DAG) {
-  SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT);
+                                   SDValue VL, SelectionDAG &DAG,
+                                   const RISCVSubtarget &Subtarget) {
   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
                            DAG.getConstant(0, DL, MVT::i32));
   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
                            DAG.getConstant(1, DL, MVT::i32));
 
-  // vmv.v.x vX, hi
-  // vsll.vx vX, vX, /*32*/
-  // vmv.v.x vY, lo
-  // vsll.vx vY, vY, /*32*/
-  // vsrl.vx vY, vY, /*32*/
-  // vor.vv vX, vX, vY
-  MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
-  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
-  Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
-  Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
-  Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
+  // Fall back to a stack store and stride x0 vector load.
+  MachineFunction &MF = DAG.getMachineFunction();
+  RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
+
+  // We use the same frame index we use for moving two i32s into 64-bit FPR.
+  // This is an analogous operation.
+  int FI = FuncInfo->getMoveF64FrameIndex(MF);
+  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue StackSlot =
+      DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()));
 
-  Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL);
-  Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL);
+  SDValue Chain = DAG.getEntryNode();
+  Lo = DAG.getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
+
+  SDValue OffsetSlot =
+      DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
+  Hi = DAG.getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), Align(8));
+
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+
+  MVT XLenVT = Subtarget.getXLenVT();
+  SDVTList VTs = DAG.getVTList({VT, MVT::Other});
+  SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
+  SDValue Ops[] = {Chain, IntID, StackSlot, DAG.getRegister(RISCV::X0, XLenVT),
+                   VL};
 
-  return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL);
+  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64,
+                                 MPI, Align(8), MachineMemOperand::MOLoad);
 }
 
 // This function lowers a splat of a scalar operand Splat with the vector
@@ -1523,7 +1536,7 @@
   }
 
   // Otherwise use the more complicated splatting algorithm.
-  return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
+  return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG, Subtarget);
 }
 
 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
@@ -3015,7 +3028,7 @@
   // VL should be the last operand.
   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
   assert(VL.getValueType() == XLenVT);
-  ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
+  ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG, Subtarget);
   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
 }
 
@@ -3079,7 +3092,8 @@
     SDValue Vec = Op.getOperand(1);
     SDValue VL = Op.getOperand(3);
 
-    SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
+    SDValue SplattedVal =
+        splatSplitI64WithVL(DL, VT, Scalar, VL, DAG, Subtarget);
     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
                                       DAG.getConstant(0, DL, MVT::i32), VL);
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vaadd.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vaadd.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vaadd.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vaadd.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vaadd.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vaadd.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vaadd.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vaadd.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vaadd.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vaadd.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vaadd.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vaadd.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vaadd.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vaadd.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vaadd.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vaaddu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vaaddu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vaaddu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vaaddu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vaaddu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vaaddu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vaaddu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vaaddu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vaaddu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vaaddu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vaaddu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vaaddu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vaaddu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vaaddu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vaaddu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll
@@ -890,15 +890,14 @@
 define <vscale x 1 x i64> @intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vadc.vvm v8, v8, v25, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vadc.nxv1i64.i64(
@@ -919,15 +918,14 @@
 define <vscale x 2 x i64> @intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vadc.vvm v8, v8, v26, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vadc.nxv2i64.i64(
@@ -948,15 +946,14 @@
 define <vscale x 4 x i64> @intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vadc.vvm v8, v8, v28, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vadc.nxv4i64.i64(
@@ -977,15 +974,14 @@
 define <vscale x 8 x i64> @intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vadc.vvm v8, v8, v16, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vadc.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vadd.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vadd.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vadd_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vadd.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vadd.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vadd.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vadd.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vadd_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vadd.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vadd.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vadd.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vadd.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vadd_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vadd_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vadd.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vadd.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vand_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vand_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vand.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vand.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vand.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vand.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vand_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vand_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vand.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vand.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vand.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vand.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vand_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vand_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vand.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vand.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vand.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vand.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vand_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vand_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vand.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vand.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vand.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vasub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vasub.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vasub.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vasub.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vasub.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vasub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vasub.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vasub.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vasub.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vasub.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vasub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vasub.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vasub.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vasub.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vasub.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vasub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasub_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vasub.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vasub.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vasub.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vasubu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vasubu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vasubu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vasubu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vasubu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vasubu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vasubu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vasubu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vasubu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vasubu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vasubu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vasubu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vasubu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vasubu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vasubu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vdiv.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vdiv.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vdiv.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vdiv.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vdiv.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vdiv.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vdiv.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vdiv.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vdiv.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vdiv.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vdiv.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vdiv.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vdiv.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vdiv.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vdivu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vdivu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vdivu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vdivu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vdivu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vdivu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vdivu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vdivu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vdivu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vdivu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vdivu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vdivu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vdivu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vdivu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll
@@ -1528,16 +1528,15 @@
 define <vscale x 1 x i64>  @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmacc.vv v8, v25, v9
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64(
@@ -1559,16 +1558,15 @@
 define <vscale x 1 x i64> @intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmacc.vv v8, v25, v9, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmacc.mask.nxv1i64.i64(
@@ -1590,16 +1588,15 @@
 define <vscale x 2 x i64>  @intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmacc.vv v8, v26, v10
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64(
@@ -1621,16 +1618,15 @@
 define <vscale x 2 x i64> @intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmacc.vv v8, v26, v10, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmacc.mask.nxv2i64.i64(
@@ -1652,16 +1648,15 @@
 define <vscale x 4 x i64>  @intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmacc.vv v8, v28, v12
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64(
@@ -1683,16 +1678,15 @@
 define <vscale x 4 x i64> @intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmacc.vv v8, v28, v12, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmacc.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll
@@ -809,15 +809,14 @@
 define <vscale x 1 x i1> @intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmadc.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.nxv1i64.i64(
@@ -836,15 +835,14 @@
 define <vscale x 2 x i1> @intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmadc.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.nxv2i64.i64(
@@ -863,15 +861,14 @@
 define <vscale x 4 x i1> @intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmadc.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.nxv4i64.i64(
@@ -890,15 +887,14 @@
 define <vscale x 8 x i1> @intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmadc.vv v0, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll
@@ -930,16 +930,15 @@
 define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v26, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmadc.vvm v25, v8, v26, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i64.i64(
@@ -960,16 +959,15 @@
 define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmadc.vvm v25, v8, v26, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmadc.carry.in.nxv2i64.i64(
@@ -990,16 +988,15 @@
 define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmadc.vvm v25, v8, v28, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmadc.carry.in.nxv4i64.i64(
@@ -1020,16 +1017,15 @@
 define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmadc.vvm v25, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmadc.carry.in.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll
@@ -1528,16 +1528,15 @@
 define <vscale x 1 x i64>  @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmadd.vv v8, v25, v9
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64(
@@ -1559,16 +1558,15 @@
 define <vscale x 1 x i64> @intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmadd.vv v8, v25, v9, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmadd.mask.nxv1i64.i64(
@@ -1590,16 +1588,15 @@
 define <vscale x 2 x i64>  @intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmadd.vv v8, v26, v10
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64(
@@ -1621,16 +1618,15 @@
 define <vscale x 2 x i64> @intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmadd.vv v8, v26, v10, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmadd.mask.nxv2i64.i64(
@@ -1652,16 +1648,15 @@
 define <vscale x 4 x i64>  @intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmadd.vv v8, v28, v12
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64(
@@ -1683,16 +1678,15 @@
 define <vscale x 4 x i64> @intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmadd.vv v8, v28, v12, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmadd.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmax_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmax.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmax.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmax.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmax.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmax_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmax.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmax.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmax.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmax.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmax_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmax.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmax.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmax.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmax.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmax_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmax_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmax.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmax.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmax.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmaxu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmaxu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmaxu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmaxu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmaxu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmaxu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmaxu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmaxu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmaxu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmaxu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmaxu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmaxu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
@@ -890,15 +890,14 @@
 define <vscale x 1 x i64> @intrinsic_vmerge_vxm_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v25, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmerge.nxv1i64.i64(
@@ -919,15 +918,14 @@
 define <vscale x 2 x i64> @intrinsic_vmerge_vxm_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v26, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmerge.nxv2i64.i64(
@@ -948,15 +946,14 @@
 define <vscale x 4 x i64> @intrinsic_vmerge_vxm_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v28, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmerge.nxv4i64.i64(
@@ -977,15 +974,14 @@
 define <vscale x 8 x i64> @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmerge.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmin_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmin.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmin.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmin.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmin.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmin_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmin.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmin.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmin.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmin.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmin_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmin.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmin.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmin.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmin.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmin_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmin_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmin.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmin.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmin.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vminu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vminu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vminu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vminu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vminu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vminu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vminu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vminu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vminu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vminu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vminu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vminu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vminu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vminu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vminu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vminu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vminu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vminu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vminu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vminu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll
@@ -809,15 +809,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmsbc.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.nxv1i64.i64(
@@ -836,15 +835,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsbc.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.nxv2i64.i64(
@@ -863,15 +861,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsbc.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.nxv4i64.i64(
@@ -890,15 +887,14 @@
 define <vscale x 8 x i1> @intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmsbc.vv v0, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll
@@ -930,16 +930,15 @@
 define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v26, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsbc.vvm v25, v8, v26, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i64.i64(
@@ -960,16 +959,15 @@
 define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsbc.vvm v25, v8, v26, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsbc.borrow.in.nxv2i64.i64(
@@ -990,16 +988,15 @@
 define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsbc.vvm v25, v8, v28, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsbc.borrow.in.nxv4i64.i64(
@@ -1020,16 +1017,15 @@
 define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmsbc.vvm v25, v8, v16, v0
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsbc.borrow.in.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll
@@ -1650,15 +1650,14 @@
 define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmseq.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64(
@@ -1679,19 +1678,18 @@
 define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmseq.vv v25, v8, v26, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmseq.vv v26, v8, v25, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64(
@@ -1712,15 +1710,14 @@
 define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmseq.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64(
@@ -1741,19 +1738,18 @@
 define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmseq.vv v25, v8, v26, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64(
@@ -1774,15 +1770,14 @@
 define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmseq.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64(
@@ -1803,19 +1798,18 @@
 define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmseq.vv v25, v8, v28, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll
@@ -714,15 +714,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsgt_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmslt.vv v0, v25, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i64.i64(
@@ -743,19 +742,18 @@
 define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmslt.vv v25, v26, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmslt.vv v26, v25, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i64.i64(
@@ -776,15 +774,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsgt_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmslt.vv v0, v26, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64.i64(
@@ -805,19 +802,18 @@
 define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmslt.vv v25, v26, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64(
@@ -838,15 +834,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsgt_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmslt.vv v0, v28, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64.i64(
@@ -867,19 +862,18 @@
 define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmslt.vv v25, v28, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll
@@ -714,15 +714,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsgtu_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmsltu.vv v0, v25, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i64.i64(
@@ -743,19 +742,18 @@
 define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsltu.vv v25, v26, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmsltu.vv v26, v25, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i64.i64(
@@ -776,15 +774,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsgtu_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsltu.vv v0, v26, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64.i64(
@@ -805,19 +802,18 @@
 define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmsltu.vv v25, v26, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64(
@@ -838,15 +834,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsgtu_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsltu.vv v0, v28, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64.i64(
@@ -867,19 +862,18 @@
 define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmsltu.vv v25, v28, v8, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll
@@ -1650,15 +1650,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsle_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmsle.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsle.nxv1i64.i64(
@@ -1679,19 +1678,18 @@
 define <vscale x 1 x i1> @intrinsic_vmsle_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsle.vv v25, v8, v26, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmsle.vv v26, v8, v25, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i64.i64(
@@ -1712,15 +1710,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsle_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsle.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.nxv2i64.i64(
@@ -1741,19 +1738,18 @@
 define <vscale x 2 x i1> @intrinsic_vmsle_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmsle.vv v25, v8, v26, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64(
@@ -1774,15 +1770,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsle_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsle.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i64.i64(
@@ -1803,19 +1798,18 @@
 define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmsle.vv v25, v8, v28, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll
@@ -1650,15 +1650,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsleu_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmsleu.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsleu.nxv1i64.i64(
@@ -1679,19 +1678,18 @@
 define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsleu.vv v25, v8, v26, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmsleu.vv v26, v8, v25, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i64.i64(
@@ -1712,15 +1710,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsleu_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsleu.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.nxv2i64.i64(
@@ -1741,19 +1738,18 @@
 define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmsleu.vv v25, v8, v26, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64(
@@ -1774,15 +1770,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsleu_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsleu.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i64.i64(
@@ -1803,19 +1798,18 @@
 define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmsleu.vv v25, v8, v28, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll
@@ -1650,15 +1650,14 @@
 define <vscale x 1 x i1> @intrinsic_vmslt_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmslt.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmslt.nxv1i64.i64(
@@ -1679,19 +1678,18 @@
 define <vscale x 1 x i1> @intrinsic_vmslt_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmslt.vv v25, v8, v26, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmslt.vv v26, v8, v25, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i64.i64(
@@ -1712,15 +1710,14 @@
 define <vscale x 2 x i1> @intrinsic_vmslt_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmslt.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i64.i64(
@@ -1741,19 +1738,18 @@
 define <vscale x 2 x i1> @intrinsic_vmslt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmslt.vv v25, v8, v26, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64(
@@ -1774,15 +1770,14 @@
 define <vscale x 4 x i1> @intrinsic_vmslt_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmslt.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i64.i64(
@@ -1803,19 +1798,18 @@
 define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmslt.vv v25, v8, v28, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll
@@ -1650,15 +1650,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsltu_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmsltu.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsltu.nxv1i64.i64(
@@ -1679,19 +1678,18 @@
 define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsltu.vv v25, v8, v26, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmsltu.vv v26, v8, v25, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i64.i64(
@@ -1712,15 +1710,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsltu_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsltu.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.nxv2i64.i64(
@@ -1741,19 +1738,18 @@
 define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmsltu.vv v25, v8, v26, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64(
@@ -1774,15 +1770,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsltu_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsltu.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i64.i64(
@@ -1803,19 +1798,18 @@
 define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmsltu.vv v25, v8, v28, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll
@@ -1650,15 +1650,14 @@
 define <vscale x 1 x i1> @intrinsic_vmsne_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmsne.vv v0, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsne.nxv1i64.i64(
@@ -1679,19 +1678,18 @@
 define <vscale x 1 x i1> @intrinsic_vmsne_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v27, a0
-; CHECK-NEXT:    vsll.vx v27, v27, a1
-; CHECK-NEXT:    vsrl.vx v27, v27, a1
-; CHECK-NEXT:    vor.vv v26, v27, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
+; CHECK-NEXT:    vmv1r.v v26, v0
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsne.vv v25, v8, v26, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    vmsne.vv v26, v8, v25, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i64.i64(
@@ -1712,15 +1710,14 @@
 define <vscale x 2 x i1> @intrinsic_vmsne_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmsne.vv v0, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.nxv2i64.i64(
@@ -1741,19 +1738,18 @@
 define <vscale x 2 x i1> @intrinsic_vmsne_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    vmsne.vv v25, v8, v26, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64(
@@ -1774,15 +1770,14 @@
 define <vscale x 4 x i1> @intrinsic_vmsne_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmsne.vv v0, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i64.i64(
@@ -1803,19 +1798,18 @@
 define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmv1r.v v25, v0
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
 ; CHECK-NEXT:    vmsne.vv v25, v8, v28, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmul_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmul.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmul.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmul.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmul.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmul_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmul.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmul.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmul.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmul.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmul_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmul.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmul.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmul.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmul.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmul_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmul_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmul.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmul.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmul.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmulh.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmulh.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmulh.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmulh.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmulh.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmulh.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmulh.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmulh.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmulhsu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmulhsu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmulhsu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmulhsu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmulhsu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmulhsu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmulhsu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmulhsu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vmulhu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vmulhu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vmulhu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vmulhu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vmulhu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vmulhu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vmulhu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vmulhu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll
@@ -240,17 +240,16 @@
 define <vscale x 1 x i64> @intrinsic_vmv.s.x_x_nxv1i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vid.v v26
 ; CHECK-NEXT:    vmseq.vi v0, v26, 0
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v25, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmv.s.x.nxv1i64(<vscale x 1 x i64> %0, i64 %1, i32 %2)
@@ -262,17 +261,16 @@
 define <vscale x 2 x i64> @intrinsic_vmv.s.x_x_nxv2i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vid.v v28
 ; CHECK-NEXT:    vmseq.vi v0, v28, 0
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v26, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmv.s.x.nxv2i64(<vscale x 2 x i64> %0, i64 %1, i32 %2)
@@ -284,17 +282,16 @@
 define <vscale x 4 x i64> @intrinsic_vmv.s.x_x_nxv4i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vid.v v12
 ; CHECK-NEXT:    vmseq.vi v0, v12, 0
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v28, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmv.s.x.nxv4i64(<vscale x 4 x i64> %0, i64 %1, i32 %2)
@@ -306,17 +303,16 @@
 define <vscale x 8 x i64> @intrinsic_vmv.s.x_x_nxv8i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vid.v v24
 ; CHECK-NEXT:    vmseq.vi v0, v24, 0
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmv.s.x.nxv8i64(<vscale x 8 x i64> %0, i64 %1, i32 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll
@@ -332,14 +332,13 @@
 define <vscale x 1 x i64> @intrinsic_vmv.v.x_x_nxv1i64(i64 %0, i32 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v8, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v8, (a0), zero
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vmv.v.x.nxv1i64(
@@ -356,14 +355,13 @@
 define <vscale x 2 x i64> @intrinsic_vmv.v.x_x_nxv2i64(i64 %0, i32 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v8, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v8, (a0), zero
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vmv.v.x.nxv2i64(
@@ -380,14 +378,13 @@
 define <vscale x 4 x i64> @intrinsic_vmv.v.x_x_nxv4i64(i64 %0, i32 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsll.vx v8, v8, a1
-; CHECK-NEXT:    vsrl.vx v8, v8, a1
-; CHECK-NEXT:    vor.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v8, (a0), zero
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vmv.v.x.nxv4i64(
@@ -404,14 +401,13 @@
 define <vscale x 8 x i64> @intrinsic_vmv.v.x_x_nxv8i64(i64 %0, i32 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v8, v8, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v8, v16, v8
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v8, (a0), zero
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll
@@ -1528,16 +1528,15 @@
 define <vscale x 1 x i64>  @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vnmsac.vv v8, v25, v9
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64(
@@ -1559,16 +1558,15 @@
 define <vscale x 1 x i64> @intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vnmsac.vv v8, v25, v9, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vnmsac.mask.nxv1i64.i64(
@@ -1590,16 +1588,15 @@
 define <vscale x 2 x i64>  @intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vnmsac.vv v8, v26, v10
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64(
@@ -1621,16 +1618,15 @@
 define <vscale x 2 x i64> @intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vnmsac.vv v8, v26, v10, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vnmsac.mask.nxv2i64.i64(
@@ -1652,16 +1648,15 @@
 define <vscale x 4 x i64>  @intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vnmsac.vv v8, v28, v12
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64(
@@ -1683,16 +1678,15 @@
 define <vscale x 4 x i64> @intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vnmsac.vv v8, v28, v12, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vnmsac.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll
@@ -1528,16 +1528,15 @@
 define <vscale x 1 x i64>  @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vnmsub.vv v8, v25, v9
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64(
@@ -1559,16 +1558,15 @@
 define <vscale x 1 x i64> @intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i64> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vnmsub.vv v8, v25, v9, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vnmsub.mask.nxv1i64.i64(
@@ -1590,16 +1588,15 @@
 define <vscale x 2 x i64>  @intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vnmsub.vv v8, v26, v10
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64(
@@ -1621,16 +1618,15 @@
 define <vscale x 2 x i64> @intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i64> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vnmsub.vv v8, v26, v10, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vnmsub.mask.nxv2i64.i64(
@@ -1652,16 +1648,15 @@
 define <vscale x 4 x i64>  @intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vnmsub.vv v8, v28, v12
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64(
@@ -1683,16 +1678,15 @@
 define <vscale x 4 x i64> @intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i64> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vnmsub.vv v8, v28, v12, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vnmsub.mask.nxv4i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vor_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vor_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vor.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vor.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vor.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vor.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vor_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vor_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vor.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vor.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vor.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vor.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vor_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vor_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vor.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vor.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vor.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vor.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vor_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vor_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vor.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vor.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vor.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vrem_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vrem.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vrem.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vrem.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vrem.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vrem_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vrem.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vrem.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vrem.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vrem.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vrem_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vrem.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vrem.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vrem.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vrem.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vrem_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrem_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vrem.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vrem.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vrem.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vremu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vremu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vremu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vremu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vremu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vremu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vremu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vremu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vremu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vremu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vremu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vremu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vremu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vremu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vremu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vremu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vremu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vremu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vremu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vremu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll
@@ -801,15 +801,14 @@
 define <vscale x 1 x i64> @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v25, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vrsub.nxv1i64.i64(
@@ -830,16 +829,15 @@
 define <vscale x 1 x i64> @intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vsub.vv v8, v25, v9, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vrsub.mask.nxv1i64.i64(
@@ -860,15 +858,14 @@
 define <vscale x 2 x i64> @intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v26, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vrsub.nxv2i64.i64(
@@ -889,16 +886,15 @@
 define <vscale x 2 x i64> @intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vsub.vv v8, v26, v10, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vrsub.mask.nxv2i64.i64(
@@ -919,15 +915,14 @@
 define <vscale x 4 x i64> @intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v28, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vrsub.nxv4i64.i64(
@@ -948,16 +943,15 @@
 define <vscale x 4 x i64> @intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vsub.vv v8, v28, v12, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vrsub.mask.nxv4i64.i64(
@@ -978,15 +972,14 @@
 define <vscale x 8 x i64> @intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v16, v8
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vrsub.nxv8i64.i64(
@@ -1008,24 +1001,13 @@
 ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vsub.vv v8, v24, v16, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsadd.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsadd.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vsadd.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsadd.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsadd.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsadd.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vsadd.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsadd.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsadd.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsadd.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vsadd.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsadd.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vsadd.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsadd.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vsadd.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsaddu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsaddu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vsaddu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsaddu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsaddu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsaddu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vsaddu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsaddu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsaddu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsaddu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vsaddu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsaddu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vsaddu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsaddu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vsaddu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll
@@ -890,15 +890,14 @@
 define <vscale x 1 x i64> @intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsbc.vvm v8, v8, v25, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsbc.nxv1i64.i64(
@@ -919,15 +918,14 @@
 define <vscale x 2 x i64> @intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsbc.vvm v8, v8, v26, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsbc.nxv2i64.i64(
@@ -948,15 +946,14 @@
 define <vscale x 4 x i64> @intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsbc.vvm v8, v8, v28, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsbc.nxv4i64.i64(
@@ -977,15 +974,14 @@
 define <vscale x 8 x i64> @intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vsbc.vvm v8, v8, v16, v0
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsbc.nxv8i64.i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsmul.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vsmul.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsmul.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vsmul.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsmul.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vsmul.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vsmul.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vsmul.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vssub.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vssub.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vssub.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vssub.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vssub.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vssub.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vssub.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vssub.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vssub.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vssub.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vssub.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vssub.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vssub.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vssub.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vssub.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vssubu.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vssubu.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vssubu.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vssubu.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vssubu.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vssubu.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vssubu.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vssubu.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vssubu.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vssubu.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vssubu.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vssubu.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vssubu.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vssubu.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vssubu.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vsub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vsub.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsub.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vsub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsub.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vsub.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vsub.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vsub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsub.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vsub.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vsub.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vsub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsub_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vsub.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vsub.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll
@@ -1773,15 +1773,14 @@
 define <vscale x 1 x i64> @intrinsic_vxor_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vxor.vv v8, v8, v25
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vxor.nxv1i64.i64(
@@ -1802,16 +1801,15 @@
 define <vscale x 1 x i64> @intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m1,ta,mu
-; CHECK-NEXT:    vmv.v.x v25, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v25, v25, a1
-; CHECK-NEXT:    vmv.v.x v26, a0
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vsrl.vx v26, v26, a1
-; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v25, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m1,tu,mu
 ; CHECK-NEXT:    vxor.vv v8, v9, v25, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vxor.mask.nxv1i64.i64(
@@ -1832,15 +1830,14 @@
 define <vscale x 2 x i64> @intrinsic_vxor_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vxor.vv v8, v8, v26
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vxor.nxv2i64.i64(
@@ -1861,16 +1858,15 @@
 define <vscale x 2 x i64> @intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m2,ta,mu
-; CHECK-NEXT:    vmv.v.x v26, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v26, v26, a1
-; CHECK-NEXT:    vmv.v.x v28, a0
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vsrl.vx v28, v28, a1
-; CHECK-NEXT:    vor.vv v26, v28, v26
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v26, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m2,tu,mu
 ; CHECK-NEXT:    vxor.vv v8, v10, v26, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vxor.mask.nxv2i64.i64(
@@ -1891,15 +1887,14 @@
 define <vscale x 4 x i64> @intrinsic_vxor_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsll.vx v12, v12, a1
-; CHECK-NEXT:    vsrl.vx v12, v12, a1
-; CHECK-NEXT:    vor.vv v28, v12, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vxor.vv v8, v8, v28
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vxor.nxv4i64.i64(
@@ -1920,16 +1915,15 @@
 define <vscale x 4 x i64> @intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a3, a2, e64,m4,ta,mu
-; CHECK-NEXT:    vmv.v.x v28, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v28, v28, a1
-; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vsrl.vx v16, v16, a1
-; CHECK-NEXT:    vor.vv v28, v16, v28
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v28, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m4,tu,mu
 ; CHECK-NEXT:    vxor.vv v8, v12, v28, v0.t
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vxor.mask.nxv4i64.i64(
@@ -1950,15 +1944,14 @@
 define <vscale x 8 x i64> @intrinsic_vxor_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vxor_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v16, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v16, v16, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v16, v24, v16
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v16, (a0), zero
 ; CHECK-NEXT:    vxor.vv v8, v8, v16
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vxor.nxv8i64.i64(
@@ -1980,24 +1973,13 @@
 ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrrs a3, vlenb, zero
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    vsetvli a3, a2, e64,m8,ta,mu
-; CHECK-NEXT:    vmv.v.x v24, a1
-; CHECK-NEXT:    addi a1, zero, 32
-; CHECK-NEXT:    vsll.vx v0, v24, a1
-; CHECK-NEXT:    vmv.v.x v24, a0
-; CHECK-NEXT:    vsll.vx v24, v24, a1
-; CHECK-NEXT:    vsrl.vx v24, v24, a1
-; CHECK-NEXT:    vor.vv v24, v24, v0
+; CHECK-NEXT:    sw a1, 12(sp)
+; CHECK-NEXT:    sw a0, 8(sp)
+; CHECK-NEXT:    vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    vlse64.v v24, (a0), zero
 ; CHECK-NEXT:    vsetvli a0, a2, e64,m8,tu,mu
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl1re8.v v0, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vxor.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    csrrs a0, vlenb, zero
-; CHECK-NEXT:    add sp, sp, a0
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    jalr zero, 0(ra)
 entry: