diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -36,6 +36,7 @@ return SelectionDAGISel::runOnMachineFunction(MF); } + void PreprocessISelDAG() override; void PostprocessISelDAG() override; void Select(SDNode *Node) override; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "MCTargetDesc/RISCVMatInt.h" #include "RISCVISelLowering.h" +#include "RISCVMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Alignment.h" @@ -40,6 +41,72 @@ } // namespace RISCV } // namespace llvm +void RISCVDAGToDAGISel::PreprocessISelDAG() { + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. + + // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector + // load. Done after lowering and combining so that we have a chance to + // optimize this to VMV_V_X_VL when the upper bits aren't needed. + if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) + continue; + + assert(N->getNumOperands() == 3 && "Unexpected number of operands"); + MVT VT = N->getSimpleValueType(0); + SDValue Lo = N->getOperand(0); + SDValue Hi = N->getOperand(1); + SDValue VL = N->getOperand(2); + assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && + Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && + "Unexpected VTs!"); + MachineFunction &MF = CurDAG->getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); + SDLoc DL(N); + + // We use the same frame index we use for moving two i32s into 64-bit FPR. + // This is an analogous operation. + int FI = FuncInfo->getMoveF64FrameIndex(MF); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); + const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); + SDValue StackSlot = + CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); + + SDValue Chain = CurDAG->getEntryNode(); + Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); + + SDValue OffsetSlot = + CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); + Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), + Align(8)); + + Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + + SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); + SDValue IntID = + CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); + SDValue Ops[] = {Chain, IntID, StackSlot, + CurDAG->getRegister(RISCV::X0, MVT::i64), VL}; + + SDValue Result = CurDAG->getMemIntrinsicNode( + ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), + MachineMemOperand::MOLoad); + + // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the + // vlse we created. This will cause general havok on the dag because + // anything below the conversion could be folded into other existing nodes. + // To avoid invalidating 'I', back it up to the convert node. + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + + // Now that we did that, the node is dead. Increment the iterator to the + // next node to process, then delete N. + ++I; + CurDAG->DeleteNode(N); + } +} + void RISCVDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -124,6 +124,9 @@ // Splats an i64 scalar to a vector type (with element type i64) where the // scalar is a sign-extended i32. SPLAT_VECTOR_I64, + // Splats an 64-bit value that has been split into two i32 parts. This is + // expanded late to two scalar stores and a stride 0 vector load. + SPLAT_VECTOR_SPLIT_I64_VL, // Read VLENB CSR READ_VLENB, // Truncates a RVV integer vector by one power-of-two. Carries both an extra diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -817,6 +817,9 @@ setTargetDAGCombine(ISD::FCOPYSIGN); setTargetDAGCombine(ISD::MGATHER); setTargetDAGCombine(ISD::MSCATTER); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SHL); } } @@ -1611,43 +1614,6 @@ return SDValue(); } -// Use a stack slot to splat the two i32 values in Lo/Hi to the vector desired -// vector nxvXi64 VT. -static SDValue splatPartsI64ThroughStack(const SDLoc &DL, MVT VT, SDValue Lo, - SDValue Hi, SDValue VL, - SelectionDAG &DAG) { - assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && - Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && - "Unexpected VTs!"); - MachineFunction &MF = DAG.getMachineFunction(); - RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); - - // We use the same frame index we use for moving two i32s into 64-bit FPR. - // This is an analogous operation. - int FI = FuncInfo->getMoveF64FrameIndex(MF); - MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue StackSlot = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout())); - - SDValue Chain = DAG.getEntryNode(); - Lo = DAG.getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); - - SDValue OffsetSlot = - DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); - Hi = DAG.getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), Align(8)); - - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - - SDVTList VTs = DAG.getVTList({VT, MVT::Other}); - SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); - SDValue Ops[] = {Chain, IntID, StackSlot, - DAG.getRegister(RISCV::X0, MVT::i64), VL}; - - return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, - MPI, Align(8), MachineMemOperand::MOLoad); -} - static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG) { if (isa(Lo) && isa(Hi)) { @@ -1660,7 +1626,7 @@ } // Fall back to a stack store and stride x0 vector load. - return splatPartsI64ThroughStack(DL, VT, Lo, Hi, VL, DAG); + return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL); } // Called by type legalization to handle splat of i64 on RV32. @@ -2969,8 +2935,8 @@ return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. - return splatPartsI64ThroughStack(DL, VecVT, Lo, Hi, - DAG.getRegister(RISCV::X0, MVT::i64), DAG); + return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi, + DAG.getRegister(RISCV::X0, MVT::i64)); } // Custom-lower extensions from mask vectors by using a vselect either with 1 @@ -5925,6 +5891,36 @@ Index, MGSN->getScale()}, MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); } + case RISCVISD::SRA_VL: + case RISCVISD::SRL_VL: + case RISCVISD::SHL_VL: { + SDValue ShAmt = N->getOperand(1); + if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { + // We don't need the upper 32 bits of a 64-bit element for a shift amount. + SDLoc DL(N); + SDValue VL = N->getOperand(3); + EVT VT = N->getValueType(0); + ShAmt = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL); + return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, + N->getOperand(2), N->getOperand(3)); + } + break; + } + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + SDValue ShAmt = N->getOperand(1); + if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { + // We don't need the upper 32 bits of a 64-bit element for a shift amount. + SDLoc DL(N); + EVT VT = N->getValueType(0); + ShAmt = + DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0)); + return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); + } + break; + } } return SDValue(); @@ -7839,6 +7835,7 @@ NODE_NAME_CASE(VMV_S_X_VL) NODE_NAME_CASE(VFMV_S_F_VL) NODE_NAME_CASE(SPLAT_VECTOR_I64) + NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) NODE_NAME_CASE(READ_VLENB) NODE_NAME_CASE(TRUNCATE_VECTOR_VL) NODE_NAME_CASE(VSLIDEUP_VL) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar: @@ -31,27 +31,12 @@ } define <2 x i32> @vnsra_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) { -; RV32-LABEL: vnsra_v2i64_v2i32_scalar: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsra.vv v25, v8, v25 -; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; RV32-NEXT: vnsrl.wi v8, v25, 0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vnsra_v2i64_v2i32_scalar: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu -; RV64-NEXT: vnsra.wx v25, v8, a0 -; RV64-NEXT: vmv1r.v v8, v25 -; RV64-NEXT: ret +; CHECK-LABEL: vnsra_v2i64_v2i32_scalar: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; CHECK-NEXT: vnsra.wx v25, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret %insert = insertelement <2 x i64> undef, i64 %y, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer %a = ashr <2 x i64> %x, %splat @@ -124,27 +109,12 @@ } define <2 x i32> @vnsrl_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) { -; RV32-LABEL: vnsrl_v2i64_v2i32_scalar: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsrl.vv v25, v8, v25 -; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; RV32-NEXT: vnsrl.wi v8, v25, 0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vnsrl_v2i64_v2i32_scalar: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu -; RV64-NEXT: vnsrl.wx v25, v8, a0 -; RV64-NEXT: vmv1r.v v8, v25 -; RV64-NEXT: ret +; CHECK-LABEL: vnsrl_v2i64_v2i32_scalar: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wx v25, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret %insert = insertelement <2 x i64> undef, i64 %y, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer %a = lshr <2 x i64> %x, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll @@ -943,16 +943,8 @@ define <2 x i64> @vsll_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v25, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v2i64: @@ -969,16 +961,8 @@ define <2 x i64> @vsll_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v2i64_unmasked: @@ -1047,16 +1031,8 @@ define <4 x i64> @vsll_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v26, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v4i64: @@ -1073,16 +1049,8 @@ define <4 x i64> @vsll_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v4i64_unmasked: @@ -1151,16 +1119,8 @@ define <8 x i64> @vsll_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v28, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v8i64: @@ -1177,16 +1137,8 @@ define <8 x i64> @vsll_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v8i64_unmasked: @@ -1255,16 +1207,8 @@ define <16 x i64> @vsll_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v16, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v16i64: @@ -1281,16 +1225,8 @@ define <16 x i64> @vsll_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_v16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -943,16 +943,8 @@ define <2 x i64> @vsra_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v25, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v2i64: @@ -969,16 +961,8 @@ define <2 x i64> @vsra_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v2i64_unmasked: @@ -1047,16 +1031,8 @@ define <4 x i64> @vsra_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v26, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v4i64: @@ -1073,16 +1049,8 @@ define <4 x i64> @vsra_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v4i64_unmasked: @@ -1151,16 +1119,8 @@ define <8 x i64> @vsra_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v28, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v8i64: @@ -1177,16 +1137,8 @@ define <8 x i64> @vsra_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v8i64_unmasked: @@ -1255,16 +1207,8 @@ define <16 x i64> @vsra_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v16, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v16i64: @@ -1281,16 +1225,8 @@ define <16 x i64> @vsra_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_v16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll @@ -943,16 +943,8 @@ define <2 x i64> @vsrl_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v25, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v2i64: @@ -969,16 +961,8 @@ define <2 x i64> @vsrl_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v2i64_unmasked: @@ -1047,16 +1031,8 @@ define <4 x i64> @vsrl_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v26, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v4i64: @@ -1073,16 +1049,8 @@ define <4 x i64> @vsrl_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v4i64_unmasked: @@ -1151,16 +1119,8 @@ define <8 x i64> @vsrl_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v28, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v8i64: @@ -1177,16 +1137,8 @@ define <8 x i64> @vsrl_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v8i64_unmasked: @@ -1255,16 +1207,8 @@ define <16 x i64> @vsrl_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v16, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v16i64: @@ -1281,16 +1225,8 @@ define <16 x i64> @vsrl_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_v16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode-rv32.ll @@ -436,15 +436,8 @@ define @vshl_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vshl_vx_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v25, (a0), zero -; CHECK-NEXT: vsll.vv v8, v8, v25 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -480,15 +473,8 @@ define @vshl_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vshl_vx_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v26, (a0), zero -; CHECK-NEXT: vsll.vv v8, v8, v26 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -524,15 +510,8 @@ define @vshl_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vshl_vx_nxv4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v28, (a0), zero -; CHECK-NEXT: vsll.vv v8, v8, v28 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -568,15 +547,8 @@ define @vshl_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vshl_vx_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsll.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -1399,16 +1399,8 @@ define @vsll_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v25, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv1i64: @@ -1425,16 +1417,8 @@ define @vsll_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv1i64_unmasked: @@ -1503,16 +1487,8 @@ define @vsll_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v26, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv2i64: @@ -1529,16 +1505,8 @@ define @vsll_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv2i64_unmasked: @@ -1607,16 +1575,8 @@ define @vsll_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v28, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv4i64: @@ -1633,16 +1593,8 @@ define @vsll_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv4i64_unmasked: @@ -1711,16 +1663,8 @@ define @vsll_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v16, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv8i64: @@ -1737,16 +1681,8 @@ define @vsll_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsll_vx_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsll.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsll.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsll_vx_nxv8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode-rv32.ll @@ -626,15 +626,8 @@ define @vsra_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vsra_vx_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v25, (a0), zero -; CHECK-NEXT: vsra.vv v8, v8, v25 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -680,15 +673,8 @@ define @vsra_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vsra_vx_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v26, (a0), zero -; CHECK-NEXT: vsra.vv v8, v8, v26 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -734,15 +720,8 @@ define @vsra_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vsra_vx_nxv4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v28, (a0), zero -; CHECK-NEXT: vsra.vv v8, v8, v28 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -788,15 +767,8 @@ define @vsra_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vsra_vx_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsra.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -1399,16 +1399,8 @@ define @vsra_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v25, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv1i64: @@ -1425,16 +1417,8 @@ define @vsra_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv1i64_unmasked: @@ -1503,16 +1487,8 @@ define @vsra_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v26, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv2i64: @@ -1529,16 +1505,8 @@ define @vsra_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv2i64_unmasked: @@ -1607,16 +1575,8 @@ define @vsra_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v28, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv4i64: @@ -1633,16 +1593,8 @@ define @vsra_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv4i64_unmasked: @@ -1711,16 +1663,8 @@ define @vsra_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v16, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv8i64: @@ -1737,16 +1681,8 @@ define @vsra_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsra_vx_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsra.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsra_vx_nxv8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode-rv32.ll @@ -436,15 +436,8 @@ define @vsrl_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vsrl_vx_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v25, (a0), zero -; CHECK-NEXT: vsrl.vv v8, v8, v25 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -480,15 +473,8 @@ define @vsrl_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vsrl_vx_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v26, (a0), zero -; CHECK-NEXT: vsrl.vv v8, v8, v26 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -524,15 +510,8 @@ define @vsrl_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vsrl_vx_nxv4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v28, (a0), zero -; CHECK-NEXT: vsrl.vv v8, v8, v28 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -568,15 +547,8 @@ define @vsrl_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vsrl_vx_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsrl.vv v8, v8, v16 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll @@ -1399,16 +1399,8 @@ define @vsrl_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v25, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv1i64: @@ -1425,16 +1417,8 @@ define @vsrl_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv1i64_unmasked: @@ -1503,16 +1487,8 @@ define @vsrl_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v26, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv2i64: @@ -1529,16 +1505,8 @@ define @vsrl_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m2,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv2i64_unmasked: @@ -1607,16 +1575,8 @@ define @vsrl_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v28, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv4i64: @@ -1633,16 +1593,8 @@ define @vsrl_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m4,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv4i64_unmasked: @@ -1711,16 +1663,8 @@ define @vsrl_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v16, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv8i64: @@ -1737,16 +1681,8 @@ define @vsrl_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vsrl_vx_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu -; RV32-NEXT: vsrl.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli a1, a2, e64,m8,ta,mu +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vsrl_vx_nxv8i64_unmasked: