diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1462,29 +1462,42 @@ // FIXME: We can optimize this when the type has sign or zero bits in one // of the halves. static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, - SDValue VL, SelectionDAG &DAG) { - SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT); + SDValue VL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(1, DL, MVT::i32)); - // vmv.v.x vX, hi - // vsll.vx vX, vX, /*32*/ - // vmv.v.x vY, lo - // vsll.vx vY, vY, /*32*/ - // vsrl.vx vY, vY, /*32*/ - // vor.vv vX, vX, vY - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); - SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); - Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); - Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); - Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL); + // Fall back to a stack store and stride x0 vector load. + MachineFunction &MF = DAG.getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); + + // We use the same frame index we use for moving two i32s into 64-bit FPR. + // This is an analogous operation. + int FI = FuncInfo->getMoveF64FrameIndex(MF); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue StackSlot = + DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout())); - Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL); - Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL); + SDValue Chain = DAG.getEntryNode(); + Lo = DAG.getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); + + SDValue OffsetSlot = + DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); + Hi = DAG.getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), Align(8)); + + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + + MVT XLenVT = Subtarget.getXLenVT(); + SDVTList VTs = DAG.getVTList({VT, MVT::Other}); + SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); + SDValue Ops[] = {Chain, IntID, StackSlot, DAG.getRegister(RISCV::X0, XLenVT), + VL}; - return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL); + return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, + MPI, Align(8), MachineMemOperand::MOLoad); } // This function lowers a splat of a scalar operand Splat with the vector @@ -1523,7 +1536,7 @@ } // Otherwise use the more complicated splatting algorithm. - return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); + return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG, Subtarget); } static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, @@ -3015,7 +3028,7 @@ // VL should be the last operand. SDValue VL = Op.getOperand(Op.getNumOperands() - 1); assert(VL.getValueType() == XLenVT); - ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); + ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG, Subtarget); return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } @@ -3079,7 +3092,8 @@ SDValue Vec = Op.getOperand(1); SDValue VL = Op.getOperand(3); - SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); + SDValue SplattedVal = + splatSplitI64WithVL(DL, VT, Scalar, VL, DAG, Subtarget); SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getConstant(0, DL, MVT::i32), VL); diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vaadd.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vaadd.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vaadd.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vaadd.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vaadd.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vaadd.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vaadd.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vaadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vaaddu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vaaddu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vaaddu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vaaddu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vaaddu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vaaddu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vaaddu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vaaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll @@ -890,15 +890,14 @@ define @intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vadc.vvm v8, v8, v25, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadc.nxv1i64.i64( @@ -919,15 +918,14 @@ define @intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vadc.vvm v8, v8, v26, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadc.nxv2i64.i64( @@ -948,15 +946,14 @@ define @intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vadc.vvm v8, v8, v28, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadc.nxv4i64.i64( @@ -977,15 +974,14 @@ define @intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vadc.vvm v8, v8, v16, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadc.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vadd_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vadd_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vadd.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vadd.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vadd_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vadd_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vadd.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vadd.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vadd_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vadd_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vadd.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vadd.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vadd_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vadd_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vand_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vand_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vand.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vand.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vand_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vand_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vand.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vand.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vand_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vand_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vand.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vand.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vand_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vand_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vand.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vasub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasub_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vasub.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vasub.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vasub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasub_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vasub.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vasub.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vasub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasub_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vasub.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vasub.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vasub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasub_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vasub.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vasub.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vasubu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vasubu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vasubu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vasubu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vasubu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vasubu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vasubu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vasubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vdiv.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vdiv.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vdiv.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vdiv.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vdiv.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vdiv.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vdiv.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdiv.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vdiv_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vdiv.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vdivu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vdivu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vdivu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vdivu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vdivu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vdivu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vdivu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vdivu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vdivu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vdivu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll @@ -1528,16 +1528,15 @@ define @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmacc.vv v8, v25, v9 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmacc.nxv1i64.i64( @@ -1559,16 +1558,15 @@ define @intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmacc.vv v8, v25, v9, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmacc.mask.nxv1i64.i64( @@ -1590,16 +1588,15 @@ define @intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmacc.vv v8, v26, v10 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmacc.nxv2i64.i64( @@ -1621,16 +1618,15 @@ define @intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmacc.vv v8, v26, v10, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmacc.mask.nxv2i64.i64( @@ -1652,16 +1648,15 @@ define @intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmacc.vv v8, v28, v12 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmacc.nxv4i64.i64( @@ -1683,16 +1678,15 @@ define @intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmacc.vv v8, v28, v12, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmacc.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll @@ -809,15 +809,14 @@ define @intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmadc.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.nxv1i64.i64( @@ -836,15 +835,14 @@ define @intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmadc.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.nxv2i64.i64( @@ -863,15 +861,14 @@ define @intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmadc.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.nxv4i64.i64( @@ -890,15 +887,14 @@ define @intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmadc.vv v0, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll @@ -930,16 +930,15 @@ define @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v26, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmadc.vvm v25, v8, v26, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.carry.in.nxv1i64.i64( @@ -960,16 +959,15 @@ define @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmadc.vvm v25, v8, v26, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.carry.in.nxv2i64.i64( @@ -990,16 +988,15 @@ define @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmadc.vvm v25, v8, v28, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.carry.in.nxv4i64.i64( @@ -1020,16 +1017,15 @@ define @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmadc.vvm v25, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadc.carry.in.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll @@ -1528,16 +1528,15 @@ define @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmadd.vv v8, v25, v9 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadd.nxv1i64.i64( @@ -1559,16 +1558,15 @@ define @intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmadd.vv v8, v25, v9, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadd.mask.nxv1i64.i64( @@ -1590,16 +1588,15 @@ define @intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmadd.vv v8, v26, v10 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadd.nxv2i64.i64( @@ -1621,16 +1618,15 @@ define @intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmadd.vv v8, v26, v10, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadd.mask.nxv2i64.i64( @@ -1652,16 +1648,15 @@ define @intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmadd.vv v8, v28, v12 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadd.nxv4i64.i64( @@ -1683,16 +1678,15 @@ define @intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmadd.vv v8, v28, v12, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmadd.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmax_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmax_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmax.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmax_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmax_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmax.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmax.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmax_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmax_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmax.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmax.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmax_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmax_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmax.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmax_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmax.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmaxu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmaxu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmaxu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmaxu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmaxu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmaxu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmaxu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmaxu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmaxu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll @@ -890,15 +890,14 @@ define @intrinsic_vmerge_vxm_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmerge.nxv1i64.i64( @@ -919,15 +918,14 @@ define @intrinsic_vmerge_vxm_nxv2i64_nxv2i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmerge.nxv2i64.i64( @@ -948,15 +946,14 @@ define @intrinsic_vmerge_vxm_nxv4i64_nxv4i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmerge.nxv4i64.i64( @@ -977,15 +974,14 @@ define @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmerge.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmin_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmin_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmin.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmin.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmin_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmin_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmin.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmin.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmin_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmin_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmin.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmin.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmin_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmin_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmin.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmin_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmin.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vminu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vminu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vminu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vminu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vminu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vminu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vminu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vminu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vminu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vminu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vminu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vminu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vminu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vminu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vminu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vminu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vminu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll @@ -809,15 +809,14 @@ define @intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmsbc.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.nxv1i64.i64( @@ -836,15 +835,14 @@ define @intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsbc.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.nxv2i64.i64( @@ -863,15 +861,14 @@ define @intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsbc.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.nxv4i64.i64( @@ -890,15 +887,14 @@ define @intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmsbc.vv v0, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll @@ -930,16 +930,15 @@ define @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v26, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsbc.vvm v25, v8, v26, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv1i64.i64( @@ -960,16 +959,15 @@ define @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsbc.vvm v25, v8, v26, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv2i64.i64( @@ -990,16 +988,15 @@ define @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsbc.vvm v25, v8, v28, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv4i64.i64( @@ -1020,16 +1017,15 @@ define @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmsbc.vvm v25, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll @@ -1650,15 +1650,14 @@ define @intrinsic_vmseq_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmseq.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmseq.nxv1i64.i64( @@ -1679,19 +1678,18 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmseq.vv v26, v8, v25, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmseq.mask.nxv1i64.i64( @@ -1712,15 +1710,14 @@ define @intrinsic_vmseq_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmseq.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmseq.nxv2i64.i64( @@ -1741,19 +1738,18 @@ define @intrinsic_vmseq_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmseq.vv v25, v8, v26, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmseq.mask.nxv2i64.i64( @@ -1774,15 +1770,14 @@ define @intrinsic_vmseq_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmseq.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmseq.nxv4i64.i64( @@ -1803,19 +1798,18 @@ define @intrinsic_vmseq_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmseq.vv v25, v8, v28, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmseq.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll @@ -714,15 +714,14 @@ define @intrinsic_vmsgt_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmslt.vv v0, v25, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgt.nxv1i64.i64( @@ -743,19 +742,18 @@ define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmslt.vv v25, v26, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmslt.vv v26, v25, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgt.mask.nxv1i64.i64( @@ -776,15 +774,14 @@ define @intrinsic_vmsgt_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmslt.vv v0, v26, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgt.nxv2i64.i64( @@ -805,19 +802,18 @@ define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmslt.vv v25, v26, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgt.mask.nxv2i64.i64( @@ -838,15 +834,14 @@ define @intrinsic_vmsgt_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmslt.vv v0, v28, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgt.nxv4i64.i64( @@ -867,19 +862,18 @@ define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmslt.vv v25, v28, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll @@ -714,15 +714,14 @@ define @intrinsic_vmsgtu_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmsltu.vv v0, v25, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgtu.nxv1i64.i64( @@ -743,19 +742,18 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsltu.vv v25, v26, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmsltu.vv v26, v25, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgtu.mask.nxv1i64.i64( @@ -776,15 +774,14 @@ define @intrinsic_vmsgtu_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsltu.vv v0, v26, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgtu.nxv2i64.i64( @@ -805,19 +802,18 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmsltu.vv v25, v26, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgtu.mask.nxv2i64.i64( @@ -838,15 +834,14 @@ define @intrinsic_vmsgtu_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsltu.vv v0, v28, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgtu.nxv4i64.i64( @@ -867,19 +862,18 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmsltu.vv v25, v28, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsgtu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll @@ -1650,15 +1650,14 @@ define @intrinsic_vmsle_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsle_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmsle.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsle.nxv1i64.i64( @@ -1679,19 +1678,18 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsle.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmsle.vv v26, v8, v25, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsle.mask.nxv1i64.i64( @@ -1712,15 +1710,14 @@ define @intrinsic_vmsle_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsle_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsle.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsle.nxv2i64.i64( @@ -1741,19 +1738,18 @@ define @intrinsic_vmsle_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmsle.vv v25, v8, v26, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsle.mask.nxv2i64.i64( @@ -1774,15 +1770,14 @@ define @intrinsic_vmsle_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsle_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsle.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsle.nxv4i64.i64( @@ -1803,19 +1798,18 @@ define @intrinsic_vmsle_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmsle.vv v25, v8, v28, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsle.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll @@ -1650,15 +1650,14 @@ define @intrinsic_vmsleu_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmsleu.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsleu.nxv1i64.i64( @@ -1679,19 +1678,18 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsleu.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmsleu.vv v26, v8, v25, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsleu.mask.nxv1i64.i64( @@ -1712,15 +1710,14 @@ define @intrinsic_vmsleu_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsleu.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsleu.nxv2i64.i64( @@ -1741,19 +1738,18 @@ define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmsleu.vv v25, v8, v26, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsleu.mask.nxv2i64.i64( @@ -1774,15 +1770,14 @@ define @intrinsic_vmsleu_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsleu.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsleu.nxv4i64.i64( @@ -1803,19 +1798,18 @@ define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmsleu.vv v25, v8, v28, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsleu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll @@ -1650,15 +1650,14 @@ define @intrinsic_vmslt_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmslt_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmslt.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.nxv1i64.i64( @@ -1679,19 +1678,18 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmslt.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmslt.vv v26, v8, v25, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i64.i64( @@ -1712,15 +1710,14 @@ define @intrinsic_vmslt_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmslt_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmslt.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.nxv2i64.i64( @@ -1741,19 +1738,18 @@ define @intrinsic_vmslt_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmslt.vv v25, v8, v26, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -1774,15 +1770,14 @@ define @intrinsic_vmslt_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmslt_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmslt.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.nxv4i64.i64( @@ -1803,19 +1798,18 @@ define @intrinsic_vmslt_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmslt.vv v25, v8, v28, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll @@ -1650,15 +1650,14 @@ define @intrinsic_vmsltu_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmsltu.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.nxv1i64.i64( @@ -1679,19 +1678,18 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsltu.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmsltu.vv v26, v8, v25, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i64.i64( @@ -1712,15 +1710,14 @@ define @intrinsic_vmsltu_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsltu.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.nxv2i64.i64( @@ -1741,19 +1738,18 @@ define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmsltu.vv v25, v8, v26, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -1774,15 +1770,14 @@ define @intrinsic_vmsltu_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsltu.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.nxv4i64.i64( @@ -1803,19 +1798,18 @@ define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmsltu.vv v25, v8, v28, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll @@ -1650,15 +1650,14 @@ define @intrinsic_vmsne_vx_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsne_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmsne.vv v0, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsne.nxv1i64.i64( @@ -1679,19 +1678,18 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v27, a0 -; CHECK-NEXT: vsll.vx v27, v27, a1 -; CHECK-NEXT: vsrl.vx v27, v27, a1 -; CHECK-NEXT: vor.vv v26, v27, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero +; CHECK-NEXT: vmv1r.v v26, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsne.vv v25, v8, v26, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmsne.vv v26, v8, v25, v0.t +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsne.mask.nxv1i64.i64( @@ -1712,15 +1710,14 @@ define @intrinsic_vmsne_vx_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsne_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmsne.vv v0, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsne.nxv2i64.i64( @@ -1741,19 +1738,18 @@ define @intrinsic_vmsne_mask_vx_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmsne.vv v25, v8, v26, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsne.mask.nxv2i64.i64( @@ -1774,15 +1770,14 @@ define @intrinsic_vmsne_vx_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmsne_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmsne.vv v0, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsne.nxv4i64.i64( @@ -1803,19 +1798,18 @@ define @intrinsic_vmsne_mask_vx_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmsne.vv v25, v8, v28, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsne.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmul_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmul_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmul.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmul.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmul_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmul_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmul.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmul.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmul_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmul_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmul.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmul.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmul_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmul_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmul.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmul_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmulh.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmulh.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmulh.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmulh.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmulh.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmulh.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmulh.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulh.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmulh_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmulh.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmulhsu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmulhsu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmulhsu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmulhsu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmulhsu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmulhsu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmulhsu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhsu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmulhsu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmulhsu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vmulhu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vmulhu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vmulhu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vmulhu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vmulhu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vmulhu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vmulhu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmulhu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vmulhu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmulhu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll @@ -240,17 +240,16 @@ define @intrinsic_vmv.s.x_x_nxv1i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vid.v v26 ; CHECK-NEXT: vmseq.vi v0, v26, 0 ; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv1i64( %0, i64 %1, i32 %2) @@ -262,17 +261,16 @@ define @intrinsic_vmv.s.x_x_nxv2i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vid.v v28 ; CHECK-NEXT: vmseq.vi v0, v28, 0 ; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv2i64( %0, i64 %1, i32 %2) @@ -284,17 +282,16 @@ define @intrinsic_vmv.s.x_x_nxv4i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vmseq.vi v0, v12, 0 ; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv4i64( %0, i64 %1, i32 %2) @@ -306,17 +303,16 @@ define @intrinsic_vmv.s.x_x_nxv8i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vid.v v24 ; CHECK-NEXT: vmseq.vi v0, v24, 0 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv8i64( %0, i64 %1, i32 %2) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll @@ -332,14 +332,13 @@ define @intrinsic_vmv.v.x_x_nxv1i64(i64 %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v8, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v8, (a0), zero +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmv.v.x.nxv1i64( @@ -356,14 +355,13 @@ define @intrinsic_vmv.v.x_x_nxv2i64(i64 %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v8, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v8, (a0), zero +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmv.v.x.nxv2i64( @@ -380,14 +378,13 @@ define @intrinsic_vmv.v.x_x_nxv4i64(i64 %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsll.vx v8, v8, a1 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vor.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v8, (a0), zero +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmv.v.x.nxv4i64( @@ -404,14 +401,13 @@ define @intrinsic_vmv.v.x_x_nxv8i64(i64 %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.x_x_nxv8i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v8, (a0), zero +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmv.v.x.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll @@ -1528,16 +1528,15 @@ define @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vnmsac.vv v8, v25, v9 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsac.nxv1i64.i64( @@ -1559,16 +1558,15 @@ define @intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vnmsac.vv v8, v25, v9, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsac.mask.nxv1i64.i64( @@ -1590,16 +1588,15 @@ define @intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vnmsac.vv v8, v26, v10 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsac.nxv2i64.i64( @@ -1621,16 +1618,15 @@ define @intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vnmsac.vv v8, v26, v10, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsac.mask.nxv2i64.i64( @@ -1652,16 +1648,15 @@ define @intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vnmsac.vv v8, v28, v12 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsac.nxv4i64.i64( @@ -1683,16 +1678,15 @@ define @intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vnmsac.vv v8, v28, v12, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsac.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll @@ -1528,16 +1528,15 @@ define @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vnmsub.vv v8, v25, v9 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsub.nxv1i64.i64( @@ -1559,16 +1558,15 @@ define @intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vnmsub.vv v8, v25, v9, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsub.mask.nxv1i64.i64( @@ -1590,16 +1588,15 @@ define @intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vnmsub.vv v8, v26, v10 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsub.nxv2i64.i64( @@ -1621,16 +1618,15 @@ define @intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vnmsub.vv v8, v26, v10, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsub.mask.nxv2i64.i64( @@ -1652,16 +1648,15 @@ define @intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vnmsub.vv v8, v28, v12 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsub.nxv4i64.i64( @@ -1683,16 +1678,15 @@ define @intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64( %0, i64 %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vnmsub.vv v8, v28, v12, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vnmsub.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vor_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vor_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vor.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vor.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vor_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vor_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vor.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vor.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vor_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vor_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vor.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vor.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vor_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vor_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vor.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vrem_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrem_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vrem.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vrem.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vrem_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrem_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vrem.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vrem.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vrem_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrem_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vrem.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vrem.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vrem_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrem_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vrem.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrem.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vrem_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrem.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vremu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vremu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vremu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vremu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vremu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vremu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vremu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vremu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vremu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vremu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vremu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vremu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vremu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vremu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vremu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vremu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vremu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vremu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll @@ -801,15 +801,14 @@ define @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsub.vv v8, v25, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.nxv1i64.i64( @@ -830,16 +829,15 @@ define @intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vsub.vv v8, v25, v9, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.mask.nxv1i64.i64( @@ -860,15 +858,14 @@ define @intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsub.vv v8, v26, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.nxv2i64.i64( @@ -889,16 +886,15 @@ define @intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vsub.vv v8, v26, v10, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.mask.nxv2i64.i64( @@ -919,15 +915,14 @@ define @intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsub.vv v8, v28, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.nxv4i64.i64( @@ -948,16 +943,15 @@ define @intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vsub.vv v8, v28, v12, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.mask.nxv4i64.i64( @@ -978,15 +972,14 @@ define @intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsub.vv v8, v16, v8 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vrsub.nxv8i64.i64( @@ -1008,24 +1001,13 @@ ; CHECK-LABEL: intrinsic_vrsub_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsub.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsadd.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vsadd.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsadd.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vsadd.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsadd.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vsadd.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsadd.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsaddu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vsaddu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsaddu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vsaddu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsaddu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vsaddu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsaddu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll @@ -890,15 +890,14 @@ define @intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsbc.vvm v8, v8, v25, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsbc.nxv1i64.i64( @@ -919,15 +918,14 @@ define @intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsbc.vvm v8, v8, v26, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsbc.nxv2i64.i64( @@ -948,15 +946,14 @@ define @intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsbc.vvm v8, v8, v28, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsbc.nxv4i64.i64( @@ -977,15 +974,14 @@ define @intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsbc.vvm v8, v8, v16, v0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsbc.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsmul.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vsmul.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsmul.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vsmul.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsmul.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vsmul.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsmul.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsmul.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vssub.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vssub.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vssub.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vssub.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vssub.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vssub.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssub.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vssubu.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vssubu.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vssubu.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vssubu.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vssubu.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vssubu.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssubu.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vsub_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsub_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vsub.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vsub_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsub_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vsub.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vsub_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsub_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vsub.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vsub_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vsub_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vsub.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vsub_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsub.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll @@ -1773,15 +1773,14 @@ define @intrinsic_vxor_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vxor_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vxor.vv v8, v8, v25 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.nxv1i64.i64( @@ -1802,16 +1801,15 @@ define @intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.x v25, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v25, v25, a1 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vsrl.vx v26, v26, a1 -; CHECK-NEXT: vor.vv v25, v26, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu ; CHECK-NEXT: vxor.vv v8, v9, v25, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.mask.nxv1i64.i64( @@ -1832,15 +1830,14 @@ define @intrinsic_vxor_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vxor_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vxor.vv v8, v8, v26 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.nxv2i64.i64( @@ -1861,16 +1858,15 @@ define @intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v26, v26, a1 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vsrl.vx v28, v28, a1 -; CHECK-NEXT: vor.vv v26, v28, v26 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v26, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu ; CHECK-NEXT: vxor.vv v8, v10, v26, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.mask.nxv2i64.i64( @@ -1891,15 +1887,14 @@ define @intrinsic_vxor_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vxor_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsll.vx v12, v12, a1 -; CHECK-NEXT: vsrl.vx v12, v12, a1 -; CHECK-NEXT: vor.vv v28, v12, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vxor.vv v8, v8, v28 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.nxv4i64.i64( @@ -1920,16 +1915,15 @@ define @intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v28, v28, a1 -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vsrl.vx v16, v16, a1 -; CHECK-NEXT: vor.vv v28, v16, v28 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v28, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu ; CHECK-NEXT: vxor.vv v8, v12, v28, v0.t +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.mask.nxv4i64.i64( @@ -1950,15 +1944,14 @@ define @intrinsic_vxor_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vxor_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v16, v16, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v16, (a0), zero ; CHECK-NEXT: vxor.vv v8, v8, v16 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vxor.nxv8i64.i64( @@ -1980,24 +1973,13 @@ ; CHECK-LABEL: intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrrs a3, vlenb, zero -; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsll.vx v0, v24, a1 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vsll.vx v24, v24, a1 -; CHECK-NEXT: vsrl.vx v24, v24, a1 -; CHECK-NEXT: vor.vv v24, v24, v0 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vxor.vv v8, v16, v24, v0.t -; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: