diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1413,6 +1413,18 @@ return false; } + // Return true if the target supports a scatter/gather instruction with + // indices which are scaled by the particular value. Note that all targets + // must by definition support scale of 1. + virtual bool isLegalScaleForGatherScatter(uint64_t Scale, + uint64_t ElemSize) const { + // MGATHER/MSCATTER are only required to support scaling by one or by the + // element size. + if (Scale != ElemSize && Scale != 1) + return false; + return true; + } + /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -514,6 +514,8 @@ SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); + SDValue visitVPGATHER(SDNode *N); + SDValue visitVPSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); @@ -10721,6 +10723,37 @@ return false; } +SDValue DAGCombiner::visitVPSCATTER(SDNode *N) { + VPScatterSDNode *MSC = cast(N); + SDValue Mask = MSC->getMask(); + SDValue Chain = MSC->getChain(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + SDValue StoreVal = MSC->getValue(); + SDValue BasePtr = MSC->getBasePtr(); + SDValue VL = MSC->getVectorLength(); + ISD::MemIndexType IndexType = MSC->getIndexType(); + SDLoc DL(N); + + // Zap scatters with a zero mask. + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) + return Chain; + + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) { + SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType); + } + + if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { + SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType); + } + + return SDValue(); +} + SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); @@ -10817,6 +10850,34 @@ return SDValue(); } +SDValue DAGCombiner::visitVPGATHER(SDNode *N) { + VPGatherSDNode *MGT = cast(N); + SDValue Mask = MGT->getMask(); + SDValue Chain = MGT->getChain(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + SDValue BasePtr = MGT->getBasePtr(); + SDValue VL = MGT->getVectorLength(); + ISD::MemIndexType IndexType = MGT->getIndexType(); + SDLoc DL(N); + + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) { + SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL}; + return DAG.getGatherVP( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType); + } + + if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { + SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL}; + return DAG.getGatherVP( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType); + } + + return SDValue(); +} + SDValue DAGCombiner::visitMGATHER(SDNode *N) { MaskedGatherSDNode *MGT = cast(N); SDValue Mask = MGT->getMask(); @@ -23571,6 +23632,15 @@ } SDValue DAGCombiner::visitVPOp(SDNode *N) { + + if (N->getOpcode() == ISD::VP_GATHER) + if (SDValue SD = visitVPGATHER(N)) + return SD; + + if (N->getOpcode() == ISD::VP_SCATTER) + if (SDValue SD = visitVPSCATTER(N)) + return SD; + // VP operations in which all vector elements are disabled - either by // determining that the mask is all false or that the EVL is 0 - can be // eliminated. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4435,17 +4435,17 @@ if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) return false; + uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); + + // Target may not support the required addressing mode. + if (ScaleVal != 1 && + !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize)) + return false; + Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - // MGATHER/MSCATTER are only required to support scaling by one or by the - // element size. Other scales may be produced using target-specific DAG - // combines. - uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); - if (ScaleVal != ElemSize && ScaleVal != 1) - return false; - Scale = DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -580,6 +580,12 @@ bool isVScaleKnownToBeAPowerOfTwo() const override; + bool isLegalScaleForGatherScatter(uint64_t Scale, + uint64_t ElemSize) const override { + // Scaled addressing not supported on indexed load/stores + return Scale == 1; + } + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9020,26 +9020,27 @@ if (!DCI.isBeforeLegalize()) break; SDValue Index, ScaleOp; - bool IsIndexScaled = false; bool IsIndexSigned = false; if (const auto *VPGSN = dyn_cast(N)) { Index = VPGSN->getIndex(); ScaleOp = VPGSN->getScale(); - IsIndexScaled = VPGSN->isIndexScaled(); IsIndexSigned = VPGSN->isIndexSigned(); + assert(!VPGSN->isIndexScaled() && + "Scaled gather/scatter should not be formed"); } else { const auto *MGSN = cast(N); Index = MGSN->getIndex(); ScaleOp = MGSN->getScale(); - IsIndexScaled = MGSN->isIndexScaled(); IsIndexSigned = MGSN->isIndexSigned(); + assert(!MGSN->isIndexScaled() && + "Scaled gather/scatter should not be formed"); + } EVT IndexVT = Index.getValueType(); MVT XLenVT = Subtarget.getXLenVT(); // RISCV indexed loads only support the "unsigned unscaled" addressing // mode, so anything else must be manually legalized. bool NeedsIdxLegalization = - IsIndexScaled || (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT)); if (!NeedsIdxLegalization) break; @@ -9056,17 +9057,6 @@ DL, IndexVT, Index); } - if (IsIndexScaled) { - // Manually scale the indices. - // TODO: Sanitize the scale operand here? - // TODO: For VP nodes, should we use VP_SHL here? - unsigned Scale = cast(ScaleOp)->getZExtValue(); - assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); - SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); - Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); - ScaleOp = DAG.getTargetConstant(1, DL, ScaleOp.getValueType()); - } - ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED; if (const auto *VPGN = dyn_cast(N)) return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -4591,13 +4591,11 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf8 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf4 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -4873,13 +4871,11 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf8 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf4 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -5443,13 +5439,11 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf4 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf2 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -5726,13 +5720,11 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf4 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf2 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -6297,13 +6289,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf2 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v8, v8, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -6579,13 +6568,10 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf2 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v8, v8, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -6869,12 +6855,11 @@ define <8 x i64> @mgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsll.vi v8, v8, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsll.vi v8, v16, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -10360,13 +10345,11 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf8 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf4 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -10581,13 +10564,11 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf8 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf4 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -11029,13 +11010,11 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf4 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf2 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -11251,13 +11230,11 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf4 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf2 v10, v8 +; RV32V-NEXT: vsll.vi v8, v10, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -11702,13 +11679,10 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf2 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v8, v8, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -11925,13 +11899,10 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf2 v16, v8 -; RV32V-NEXT: vsll.vi v8, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v8, v8, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -12156,12 +12127,11 @@ define <8 x double> @mgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsll.vi v8, v8, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32V-NEXT: vnsrl.wi v16, v8, 0 +; RV32V-NEXT: vsll.vi v8, v16, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -3753,13 +3753,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf8 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf4 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64: @@ -4003,13 +4001,11 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf8 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf4 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64: @@ -4509,13 +4505,11 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf4 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf2 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64: @@ -4760,13 +4754,11 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf4 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf2 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64: @@ -5269,13 +5261,10 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf2 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v12, v12, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64: @@ -5521,13 +5510,10 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf2 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v12, v12, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64: @@ -5781,12 +5767,11 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsll.vi v12, v12, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsll.vi v12, v16, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_v8i64: @@ -8878,13 +8863,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf8 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf4 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64: @@ -9084,13 +9067,11 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf8 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf4 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64: @@ -9502,13 +9483,11 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf4 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsext.vf2 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64: @@ -9709,13 +9688,11 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf4 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vzext.vf2 v14, v12 +; RV32V-NEXT: vsll.vi v12, v14, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64: @@ -10130,13 +10107,10 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsext.vf2 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v12, v12, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64: @@ -10338,13 +10312,10 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vzext.vf2 v16, v12 -; RV32V-NEXT: vsll.vi v12, v16, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32V-NEXT: vsll.vi v12, v12, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64: @@ -10554,12 +10525,11 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32V-NEXT: vsll.vi v12, v12, 3 -; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32V-NEXT: vnsrl.wi v16, v12, 0 +; RV32V-NEXT: vsll.vi v12, v16, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_v8f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -961,12 +961,10 @@ define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf8 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf4 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -987,12 +985,10 @@ define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf8 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf4 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1036,12 +1032,10 @@ define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1062,12 +1056,10 @@ define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1110,12 +1102,9 @@ define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1136,12 +1125,9 @@ define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1162,11 +1148,10 @@ define <8 x i64> @vpgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsll.vi v12, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1698,12 +1683,10 @@ define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf8 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf4 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1724,12 +1707,10 @@ define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf8 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf4 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1773,12 +1754,10 @@ define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1799,12 +1778,10 @@ define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1847,12 +1824,9 @@ define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1873,12 +1847,9 @@ define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1899,11 +1870,10 @@ define <8 x double> @vpgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vsll.vi v12, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; @@ -1977,16 +1947,16 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vsext.vf4 v16, v8 ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB87_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB87_2: -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a3, a1, -16 @@ -2005,20 +1975,22 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu +; RV64-NEXT: vslidedown.vi v12, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: addi a3, a1, -16 +; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: bltu a1, a3, .LBB87_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB87_2: +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v10, 2 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV64-NEXT: vslidedown.vi v12, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2026,9 +1998,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB87_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2041,37 +2010,30 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v10, v0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV32-NEXT: vslidedown.vi v12, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vsext.vf8 v16, v12 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vsext.vf4 v16, v8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB88_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB88_2: -; RV32-NEXT: vsext.vf8 v24, v8 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v12, v16, 0 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v10, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB88_4 +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a1, a3, .LBB88_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB88_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v4, v24, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: @@ -2081,17 +2043,17 @@ ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: addi a3, a1, -16 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: bltu a1, a3, .LBB88_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB88_2: -; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v10, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2099,8 +2061,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB88_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2114,37 +2074,30 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v10, v0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV32-NEXT: vslidedown.vi v12, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vzext.vf8 v16, v12 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vzext.vf4 v16, v8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB89_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB89_2: -; RV32-NEXT: vzext.vf8 v24, v8 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v12, v16, 0 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v10, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB89_4 +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a1, a3, .LBB89_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB89_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v4, v24, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: @@ -2154,17 +2107,17 @@ ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vzext.vf8 v16, v12 +; RV64-NEXT: vzext.vf8 v24, v8 +; RV64-NEXT: addi a3, a1, -16 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: bltu a1, a3, .LBB89_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB89_2: -; RV64-NEXT: vzext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v10, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2172,8 +2125,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB89_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2187,16 +2138,16 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB90_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB90_2: -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a3, a1, -16 @@ -2215,20 +2166,22 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu +; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: addi a3, a1, -16 +; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: bltu a1, a3, .LBB90_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB90_2: +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu -; RV64-NEXT: vslidedown.vi v16, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2236,9 +2189,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB90_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2251,37 +2201,30 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, mu -; RV32-NEXT: vslidedown.vi v24, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vsext.vf4 v16, v24 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vsext.vf2 v16, v8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB91_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB91_2: -; RV32-NEXT: vsext.vf4 v24, v8 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v8, v16, 0 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v12, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB91_4 +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a1, a3, .LBB91_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB91_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v4, v24, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: @@ -2289,19 +2232,19 @@ ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu -; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v0, v16 +; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsext.vf4 v16, v24 +; RV64-NEXT: vsll.vi v16, v0, 3 ; RV64-NEXT: bltu a1, a3, .LBB91_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB91_2: -; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2309,8 +2252,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB91_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2324,37 +2265,30 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, mu -; RV32-NEXT: vslidedown.vi v24, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vzext.vf4 v16, v24 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vzext.vf2 v16, v8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB92_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB92_2: -; RV32-NEXT: vzext.vf4 v24, v8 -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v8, v16, 0 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v12, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB92_4 +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a1, a3, .LBB92_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB92_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v4, v24, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: @@ -2362,19 +2296,19 @@ ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu -; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vzext.vf4 v0, v16 +; RV64-NEXT: vzext.vf4 v24, v8 ; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vzext.vf4 v16, v24 +; RV64-NEXT: vsll.vi v16, v0, 3 ; RV64-NEXT: bltu a1, a3, .LBB92_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB92_2: -; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2382,8 +2316,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB92_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2397,15 +2329,15 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB93_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB93_2: -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a3, a1, -16 @@ -2424,20 +2356,22 @@ ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vmv1r.v v1, v0 +; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v0, v16 +; RV64-NEXT: vsll.vi v16, v0, 3 +; RV64-NEXT: addi a3, a1, -16 +; RV64-NEXT: vsext.vf2 v0, v8 ; RV64-NEXT: bltu a1, a3, .LBB93_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB93_2: +; RV64-NEXT: vsll.vi v8, v0, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vi v0, v1, 2 -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: vslidedown.vi v16, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2445,11 +2379,8 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB93_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf2 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs @@ -2460,69 +2391,71 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v24, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vsext.vf2 v16, v24 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB94_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB94_2: -; RV32-NEXT: vsext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v4, v8, 0 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v1, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v4, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB94_4 +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a1, a3, .LBB94_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB94_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v1, v0 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsext.vf2 v0, v8 ; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vsext.vf2 v16, v24 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: bltu a1, a3, .LBB94_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v0, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vi v0, v1, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v24, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 ; RV64-NEXT: bltu a1, a2, .LBB94_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB94_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs @@ -2533,69 +2466,71 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v24, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vzext.vf2 v16, v24 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB95_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a3 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB95_2: -; RV32-NEXT: vzext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v4, v8, 0 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v1, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v4, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB95_4 +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: bltu a1, a3, .LBB95_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a1, 16 +; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB95_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v1, v0 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vzext.vf2 v24, v16 +; RV64-NEXT: vzext.vf2 v0, v8 ; RV64-NEXT: addi a3, a1, -16 -; RV64-NEXT: vzext.vf2 v16, v24 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: bltu a1, a3, .LBB95_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v0, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vi v0, v1, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v24, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 ; RV64-NEXT: bltu a1, a2, .LBB95_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB95_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs @@ -2606,48 +2541,51 @@ define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vmv1r.v v24, v0 +; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 0 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, tu, mu +; RV32-NEXT: vslideup.vi v16, v24, 16 +; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; RV32-NEXT: addi a3, a1, -16 +; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: bltu a1, a3, .LBB96_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 ; RV32-NEXT: .LBB96_2: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v28, v16, 0 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV32-NEXT: vslidedown.vi v0, v24, 2 +; RV32-NEXT: vslidedown.vi v0, v1, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 ; RV32-NEXT: bltu a1, a2, .LBB96_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB96_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v28, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: vluxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: addi a3, a1, -16 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: bltu a1, a3, .LBB96_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 ; RV64-NEXT: .LBB96_2: +; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v24, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 @@ -2655,8 +2593,6 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB96_4: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -775,13 +775,11 @@ define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf8 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf4 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i64: @@ -801,13 +799,11 @@ define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf8 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf4 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: @@ -850,13 +846,11 @@ define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf2 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i64: @@ -876,13 +870,11 @@ define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf2 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: @@ -924,13 +916,10 @@ define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8i64: @@ -950,13 +939,10 @@ define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8i64: @@ -976,12 +962,11 @@ define void @vpscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsll.vi v12, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsll.vi v12, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v8i64: @@ -1496,13 +1481,11 @@ define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf8 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf4 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f64: @@ -1522,13 +1505,11 @@ define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf8 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf4 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: @@ -1571,13 +1552,11 @@ define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsext.vf2 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f64: @@ -1597,13 +1576,11 @@ define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vzext.vf2 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: @@ -1645,13 +1622,10 @@ define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8f64: @@ -1671,13 +1645,10 @@ define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vzext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV32-NEXT: vsll.vi v12, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8f64: @@ -1697,12 +1668,11 @@ define void @vpscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vsll.vi v12, v12, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsll.vi v12, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v8f64: @@ -1805,26 +1775,26 @@ ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: bltu a2, a3, .LBB80_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB80_2: -; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV32-NEXT: addi a1, a2, -16 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: bltu a2, a1, .LBB80_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a3, 16 +; RV32-NEXT: .LBB80_2: +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV32-NEXT: addi a3, a2, -16 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: bltu a2, a1, .LBB80_4 +; RV32-NEXT: bltu a2, a3, .LBB80_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a3, a1 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB80_4: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu ; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret ; @@ -1833,57 +1803,52 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV64-NEXT: vle32.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB80_2 -; RV64-NEXT: # %bb.1: +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v8, v16 +; RV64-NEXT: vsext.vf2 v16, v24 ; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: bltu a2, a3, .LBB80_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB80_2: -; RV64-NEXT: li a1, 0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8re8.v v0, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsext.vf2 v24, v0 -; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; RV64-NEXT: addi a3, a2, -16 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: vsll.vi v16, v8, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a3, .LBB80_4 +; RV64-NEXT: bltu a2, a1, .LBB80_4 ; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a1, a3 +; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB80_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf2 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1895,62 +1860,30 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsext.vf2 v16, v24 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: bltu a2, a3, .LBB81_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 +; RV32-NEXT: vsll.vi v24, v24, 3 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: bltu a2, a1, .LBB81_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a3, 16 ; RV32-NEXT: .LBB81_2: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: vsext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: bltu a2, a1, .LBB81_4 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV32-NEXT: addi a3, a2, -16 +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: bltu a2, a3, .LBB81_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a3, a1 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB81_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64: @@ -1958,65 +1891,53 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 24 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV64-NEXT: vle32.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: vslidedown.vi v8, v24, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsext.vf2 v16, v24 +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV64-NEXT: vslidedown.vi v24, v24, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsext.vf2 v8, v24 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB81_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB81_2: ; RV64-NEXT: li a3, 0 -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsext.vf2 v16, v24 -; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsll.vi v16, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: addi a1, a2, -16 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: bltu a2, a1, .LBB81_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB81_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 24 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2029,62 +1950,30 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vzext.vf2 v16, v24 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: bltu a2, a3, .LBB82_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 +; RV32-NEXT: vsll.vi v24, v24, 3 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: bltu a2, a1, .LBB82_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a3, 16 ; RV32-NEXT: .LBB82_2: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: vzext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: bltu a2, a1, .LBB82_4 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV32-NEXT: addi a3, a2, -16 +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: bltu a2, a3, .LBB82_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a3, a1 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB82_4: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64: @@ -2092,65 +1981,53 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 24 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV64-NEXT: vle32.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: vslidedown.vi v8, v24, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: li a3, 16 +; RV64-NEXT: vzext.vf2 v16, v24 +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV64-NEXT: vslidedown.vi v24, v24, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vzext.vf2 v8, v24 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB82_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB82_2: ; RV64-NEXT: li a3, 0 -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vzext.vf2 v16, v24 -; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsll.vi v16, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: addi a1, a2, -16 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: bltu a2, a1, .LBB82_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB82_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 24 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1034,13 +1034,11 @@ define @mgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1061,13 +1059,11 @@ define @mgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1112,13 +1108,11 @@ define @mgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1139,13 +1133,11 @@ define @mgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1189,13 +1181,10 @@ define @mgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v8, v8, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1216,13 +1205,10 @@ define @mgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v8, v8, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1243,12 +1229,11 @@ define @mgather_baseidx_nxv8i64(i64* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1965,13 +1950,11 @@ define @mgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1992,13 +1975,11 @@ define @mgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2043,13 +2024,11 @@ define @mgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2070,13 +2049,11 @@ define @mgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v8, v12, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2120,13 +2097,10 @@ define @mgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v8, v8, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2147,13 +2121,10 @@ define @mgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v8 -; RV32-NEXT: vsll.vi v8, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v8, v8, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2174,12 +2145,11 @@ define @mgather_baseidx_nxv8f64(double* %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v8, 0 +; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -823,13 +823,11 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: @@ -848,13 +846,11 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: @@ -895,13 +891,11 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: @@ -920,13 +914,11 @@ define void @mscatter_baseidx_zext_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: @@ -966,13 +958,10 @@ define void @mscatter_baseidx_sext_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: @@ -991,13 +980,10 @@ define void @mscatter_baseidx_zext_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: @@ -1016,12 +1002,11 @@ define void @mscatter_baseidx_nxv8i64( %val, i64* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8i64: @@ -1605,13 +1590,11 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: @@ -1630,13 +1613,11 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: @@ -1677,13 +1658,11 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: @@ -1702,13 +1681,11 @@ define void @mscatter_baseidx_zext_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: @@ -1748,13 +1725,10 @@ define void @mscatter_baseidx_sext_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: @@ -1773,13 +1747,10 @@ define void @mscatter_baseidx_zext_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: @@ -1798,12 +1769,11 @@ define void @mscatter_baseidx_nxv8f64( %val, double* %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8f64: @@ -1895,13 +1865,13 @@ ; RV64-NEXT: vsext.vf8 v24, v2 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vsext.vf8 v8, v3 +; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v8, v3 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, %idxs @@ -1935,13 +1905,13 @@ ; RV64-NEXT: vsext.vf4 v24, v4 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vsext.vf4 v8, v6 +; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v8, v6 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1052,12 +1052,10 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1078,12 +1076,10 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1127,12 +1123,10 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1153,12 +1147,10 @@ define @vpgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1201,12 +1193,9 @@ define @vpgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1227,12 +1216,9 @@ define @vpgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1253,11 +1239,10 @@ define @vpgather_baseidx_nxv8i64(i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1847,12 +1832,10 @@ define @vpgather_baseidx_sext_nxv6i8_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1873,12 +1856,10 @@ define @vpgather_baseidx_zext_nxv6i8_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1922,12 +1903,10 @@ define @vpgather_baseidx_sext_nxv6i16_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1948,12 +1927,10 @@ define @vpgather_baseidx_zext_nxv6i16_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1996,12 +1973,9 @@ define @vpgather_baseidx_sext_nxv6i32_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2022,12 +1996,9 @@ define @vpgather_baseidx_zext_nxv6i32_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2048,11 +2019,10 @@ define @vpgather_baseidx_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2113,12 +2083,10 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2139,12 +2107,10 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2188,12 +2154,10 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2214,12 +2178,10 @@ define @vpgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2262,12 +2224,9 @@ define @vpgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2288,12 +2247,9 @@ define @vpgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2314,11 +2270,10 @@ define @vpgather_baseidx_nxv8f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -2394,6 +2349,9 @@ ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: li a3, 0 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; RV32-NEXT: vsext.vf2 v16, v8 +; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: srli a5, a2, 3 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu @@ -2403,9 +2361,6 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a4 ; RV32-NEXT: .LBB103_2: -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB103_4 @@ -2421,6 +2376,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: li a3, 0 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: srli a5, a2, 3 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu @@ -2431,17 +2390,13 @@ ; RV64-NEXT: mv a3, a4 ; RV64-NEXT: .LBB103_2: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v16, v10 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: bltu a1, a2, .LBB103_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB103_4: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2454,35 +2409,29 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v12, v0 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; RV32-NEXT: vsext.vf2 v16, v8 +; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: srli a5, a2, 3 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu +; RV32-NEXT: sub a4, a1, a2 +; RV32-NEXT: vslidedown.vx v0, v0, a5 +; RV32-NEXT: bltu a1, a4, .LBB104_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a3, a2 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: .LBB104_2: -; RV32-NEXT: li a4, 0 -; RV32-NEXT: vsext.vf4 v24, v10 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: srli a3, a2, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu -; RV32-NEXT: sub a2, a1, a2 -; RV32-NEXT: vslidedown.vx v0, v0, a3 +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB104_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a4, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB104_4: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: @@ -2491,6 +2440,8 @@ ; RV64-NEXT: li a3, 0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: srli a5, a2, 3 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu @@ -2501,16 +2452,13 @@ ; RV64-NEXT: mv a3, a4 ; RV64-NEXT: .LBB104_2: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: bltu a1, a2, .LBB104_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB104_4: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2524,35 +2472,29 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v12, v0 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; RV32-NEXT: vzext.vf2 v16, v8 +; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: srli a5, a2, 3 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu +; RV32-NEXT: sub a4, a1, a2 +; RV32-NEXT: vslidedown.vx v0, v0, a5 +; RV32-NEXT: bltu a1, a4, .LBB105_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a3, a2 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: .LBB105_2: -; RV32-NEXT: li a4, 0 -; RV32-NEXT: vzext.vf4 v24, v10 -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: srli a3, a2, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu -; RV32-NEXT: sub a2, a1, a2 -; RV32-NEXT: vslidedown.vx v0, v0, a3 +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB105_4 ; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a4, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB105_4: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: @@ -2561,6 +2503,8 @@ ; RV64-NEXT: li a3, 0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vzext.vf4 v16, v10 +; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: srli a5, a2, 3 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu @@ -2571,16 +2515,13 @@ ; RV64-NEXT: mv a3, a4 ; RV64-NEXT: .LBB105_2: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu -; RV64-NEXT: vzext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: bltu a1, a2, .LBB105_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB105_4: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -811,13 +811,11 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: @@ -837,13 +835,11 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: @@ -886,13 +882,11 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: @@ -912,13 +906,11 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: @@ -960,13 +952,10 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: @@ -986,13 +975,10 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: @@ -1012,12 +998,11 @@ define void @vpscatter_baseidx_nxv8i64( %val, i64* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i64: @@ -1586,13 +1571,11 @@ define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: @@ -1612,13 +1595,11 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: @@ -1661,13 +1642,11 @@ define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: @@ -1687,13 +1666,11 @@ define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: @@ -1735,13 +1712,10 @@ define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: @@ -1761,13 +1735,10 @@ define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: @@ -1787,12 +1758,11 @@ define void @vpscatter_baseidx_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6f64: @@ -1851,13 +1821,11 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: @@ -1877,13 +1845,11 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf8 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: @@ -1926,13 +1892,11 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: @@ -1952,13 +1916,11 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf4 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vzext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: @@ -2000,13 +1962,10 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: @@ -2026,13 +1985,10 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vzext.vf2 v24, v16 -; RV32-NEXT: vsll.vi v16, v24, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: @@ -2052,12 +2008,11 @@ define void @vpscatter_baseidx_nxv8f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v16, v16, 3 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; RV32-NEXT: vnsrl.wi v24, v16, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8f64: @@ -2146,16 +2101,16 @@ ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vl4re16.v v4, (a1) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; RV32-NEXT: vsext.vf2 v24, v4 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB96_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB96_2: ; RV32-NEXT: li a4, 0 -; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, mu -; RV32-NEXT: vsext.vf2 v24, v4 -; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: srli a3, a1, 3 @@ -2172,19 +2127,28 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: vl4re16.v v4, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v16, v4 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: mv a3, a2 +; RV64-NEXT: vsext.vf4 v24, v6 ; RV64-NEXT: bltu a2, a1, .LBB96_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB96_2: ; RV64-NEXT: li a4, 0 -; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v24, v4 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: srli a3, a1, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a1, a2, a1 @@ -2193,11 +2157,14 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a1 ; RV64-NEXT: .LBB96_4: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf4 v8, v6 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, %idxs call void @llvm.vp.scatter.nxv16f64.nxv16p0f64( %val, %ptrs, %m, i32 %evl) @@ -2207,35 +2174,18 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: vl4re16.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV32-NEXT: vl4re16.v v4, (a1) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; RV32-NEXT: vsext.vf2 v24, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsext.vf4 v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB97_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB97_2: ; RV32-NEXT: li a4, 0 -; RV32-NEXT: vsext.vf4 v16, v26 -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: srli a3, a1, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu @@ -2245,21 +2195,8 @@ ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: .LBB97_4: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: @@ -2267,31 +2204,25 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vl4re16.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl4re16.v v4, (a1) ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v16, v4 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsext.vf4 v8, v24 ; RV64-NEXT: mv a3, a2 +; RV64-NEXT: vsext.vf4 v24, v6 ; RV64-NEXT: bltu a2, a1, .LBB97_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB97_2: ; RV64-NEXT: li a4, 0 -; RV64-NEXT: vsext.vf4 v16, v26 -; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl8re8.v v24, (a3) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: srli a3, a1, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a1, a2, a1 @@ -2300,17 +2231,12 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a1 ; RV64-NEXT: .LBB97_4: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2323,35 +2249,18 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: vl4re16.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV32-NEXT: vl4re16.v v4, (a1) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; RV32-NEXT: vzext.vf2 v24, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vzext.vf4 v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB98_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB98_2: ; RV32-NEXT: li a4, 0 -; RV32-NEXT: vzext.vf4 v16, v26 -; RV32-NEXT: vsll.vi v8, v8, 3 -; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v24, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: srli a3, a1, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu @@ -2361,21 +2270,8 @@ ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: .LBB98_4: -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV32-NEXT: vsll.vi v8, v16, 3 -; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu -; RV32-NEXT: vnsrl.wi v16, v8, 0 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: @@ -2383,31 +2279,25 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vl4re16.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl4re16.v v4, (a1) ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf4 v16, v4 +; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vzext.vf4 v8, v24 ; RV64-NEXT: mv a3, a2 +; RV64-NEXT: vzext.vf4 v24, v6 ; RV64-NEXT: bltu a2, a1, .LBB98_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB98_2: ; RV64-NEXT: li a4, 0 -; RV64-NEXT: vzext.vf4 v16, v26 -; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl8re8.v v24, (a3) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: srli a3, a1, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a1, a2, a1 @@ -2416,17 +2306,12 @@ ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a1 ; RV64-NEXT: .LBB98_4: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret