diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -420,6 +420,8 @@ static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget); + bool shouldRemoveExtendFromGSIndex(EVT VT) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, @@ -462,6 +464,7 @@ SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMGATHER(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -468,6 +468,9 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + setOperationAction(ISD::MGATHER, VT, Custom); + + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); @@ -507,6 +510,9 @@ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Legal); + setOperationAction(ISD::MGATHER, VT, Custom); + + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); @@ -677,8 +683,10 @@ if (Subtarget.hasStdExtZbp()) { setTargetDAGCombine(ISD::OR); } - if (Subtarget.hasStdExtV()) + if (Subtarget.hasStdExtV()) { setTargetDAGCombine(ISD::FCOPYSIGN); + setTargetDAGCombine(ISD::MGATHER); + } } EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, @@ -1551,9 +1559,8 @@ // better than going through the stack, as the default expansion does. SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); - assert(VT.isFixedLengthVector() && "Unexpected CONCAT_VECTORS lowering"); unsigned NumOpElts = - Op.getOperand(0).getSimpleValueType().getVectorNumElements(); + Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); SDValue Vec = DAG.getUNDEF(VT); for (const auto &OpIdx : enumerate(Op->ops())) Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), @@ -1629,6 +1636,8 @@ return lowerFixedLengthVectorSelectToRVV(Op, DAG); case ISD::FCOPYSIGN: return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); + case ISD::MGATHER: + return lowerMGATHER(Op, DAG); } } @@ -3296,6 +3305,46 @@ return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); } +// Custom lower MGATHER to a legalized form for RVV. It will then be matched to +// a RVV indexed load. The RVV indexed load/store instructions only support the +// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or +// truncated to XLEN and are treated as byte offsets. Any signed or scaled +// indexing is extended to the XLEN value type and scaled accordingly. +SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const { + MaskedGatherSDNode *N = cast(Op.getNode()); + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue PassThru = N->getPassThru(); + + MVT XLenVT = Subtarget.getXLenVT(); + assert(N->getBasePtr().getSimpleValueType() == XLenVT && + "Unexpected pointer type"); + // Targets have to explicitly opt-in for extending vector loads. + assert(N->getExtensionType() == ISD::NON_EXTLOAD && + "Unexpected extending MGATHER"); + + SDValue VL = getDefaultVLOps(VT, VT, DL, DAG, Subtarget).second; + // If the mask is known to be all ones, optimize to an unmasked intrinsic; + // the selection of the masked intrinsics doesn't do this for us. + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode())) { + SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vloxei, DL, XLenVT); + SDValue Ops[] = {N->getChain(), IntID, N->getBasePtr(), Index, VL}; + return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(VT, MVT::Other), Ops, + N->getMemoryVT(), N->getMemOperand()); + } + + SDValue IntID = + DAG.getTargetConstant(Intrinsic::riscv_vloxei_mask, DL, XLenVT); + SDValue Ops[] = {N->getChain(), IntID, PassThru, N->getBasePtr(), + Index, Mask, VL}; + return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(VT, MVT::Other), Ops, + N->getMemoryVT(), N->getMemOperand()); +} + // Returns the opcode of the target-specific SDNode that implements the 32-bit // form of the given Opcode. static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { @@ -4232,6 +4281,49 @@ return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); } + case ISD::MGATHER: { + if (!DCI.isBeforeLegalize()) + break; + MaskedGatherSDNode *MGN = cast(N); + SDValue Index = MGN->getIndex(); + EVT IndexVT = Index.getValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + // RISCV indexed loads only support the "unsigned unscaled" addressing + // mode, so anything else must be manually legalized. + bool NeedsIdxLegalization = + MGN->isIndexScaled() || + (MGN->isIndexSigned() && IndexVT.getVectorElementType().bitsLT(XLenVT)); + if (!NeedsIdxLegalization) + break; + + SDLoc DL(N); + + // Any index legalization should first promote to XLenVT, so we don't lose + // bits when scaling. This may create an illegal index type so we let + // LLVM's legalization take care of the splitting. + if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { + IndexVT = IndexVT.changeVectorElementType(XLenVT); + Index = DAG.getNode(MGN->isIndexSigned() ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND, + DL, IndexVT, Index); + } + + unsigned Scale = N->getConstantOperandVal(5); + if (MGN->isIndexScaled() && Scale != 1) { + // Manually scale the indices by the element size. + // TODO: Sanitize the scale operand here? + assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); + SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); + Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); + } + + ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; + return DAG.getMaskedGather( + N->getVTList(), MGN->getMemoryVT(), DL, + {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), MGN->getBasePtr(), + Index, MGN->getScale()}, + MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); + } } return SDValue(); @@ -6541,6 +6633,10 @@ return Result; } +bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { + return false; +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -0,0 +1,2190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 + +declare @llvm.masked.gather.nxv1i8.nxv1p0i8(, i32, , ) + +define @mgather_nxv1i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i8.nxv1p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i8.nxv2p0i8(, i32, , ) + +define @mgather_nxv2i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +define @mgather_nxv2i8_sextload_nxv2i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vsext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vsext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i8_zextload_nxv2i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vzext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vzext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = zext %v to + ret %ev +} + +define @mgather_nxv2i8_sextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vsext.vf4 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vsext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i8_zextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vzext.vf4 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vzext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = zext %v to + ret %ev +} + +define @mgather_nxv2i8_sextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsext.vf8 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsext.vf8 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i8_zextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vzext.vf8 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vzext.vf8 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = zext %v to + ret %ev +} + +declare @llvm.masked.gather.nxv4i8.nxv4p0i8(, i32, , ) + +define @mgather_nxv4i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i8.nxv4p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i8( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i8.nxv4p0i8( %ptrs, i32 1, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i8.nxv8p0i8(, i32, , ) + +define @mgather_nxv8i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i8.nxv8p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8(i8* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v9, (a0), v16, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i8, i8* %base, %idxs + %v = call @llvm.masked.gather.nxv8i8.nxv8p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1i16.nxv1p0i16(, i32, , ) + +define @mgather_nxv1i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i16.nxv1p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i16.nxv2p0i16(, i32, , ) + +define @mgather_nxv2i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_nxv2i16_sextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vsext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vsext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i16_zextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vzext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vzext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = zext %v to + ret %ev +} + +define @mgather_nxv2i16_sextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsext.vf4 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i16_zextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vzext.vf4 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vzext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = zext %v to + ret %ev +} + +declare @llvm.masked.gather.nxv4i16.nxv4p0i16(, i32, , ) + +define @mgather_nxv4i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i16.nxv4p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i16.nxv4p0i16( %ptrs, i32 2, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i16.nxv8p0i16(, i32, , ) + +define @mgather_nxv8i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i16, i16* %base, %idxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i16, i16* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i16, i16* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i16, i16* %base, %idxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1i32.nxv1p0i32(, i32, , ) + +define @mgather_nxv1i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i32.nxv1p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i32.nxv2p0i32(, i32, , ) + +define @mgather_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i32.nxv2p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_nxv2i32_sextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsext.vf2 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i32.nxv2p0i32( %ptrs, i32 4, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i32_zextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vzext.vf2 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vzext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i32.nxv2p0i32( %ptrs, i32 4, %m, %passthru) + %ev = zext %v to + ret %ev +} + +declare @llvm.masked.gather.nxv4i32.nxv4p0i32(, i32, , ) + +define @mgather_nxv4i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i32( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %ptrs, i32 4, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i32.nxv8p0i32(, i32, , ) + +define @mgather_nxv8i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1i64.nxv1p0i64(, i32, , ) + +define @mgather_nxv1i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i64.nxv1p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i64.nxv2p0i64(, i32, , ) + +define @mgather_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i64.nxv2p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4i64.nxv4p0i64(, i32, , ) + +define @mgather_nxv4i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i64.nxv4p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i64( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i64.nxv4p0i64( %ptrs, i32 8, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i64.nxv8p0i64(, i32, , ) + +define @mgather_nxv8i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsext.vf8 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vzext.vf8 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsext.vf4 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vzext.vf4 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsext.vf2 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vzext.vf2 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsll.vi v8, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv16i64.nxv16p0f64(, i32, , ) + +declare @llvm.experimental.vector.insert.nxv8i64.nxv16i64(, , i64 %idx) +declare @llvm.experimental.vector.insert.nxv8p0i64.nxv16p0i64(, , i64 %idx) + +define void @mgather_nxv16i64( %ptrs0, %ptrs1, %m, %passthru0, %passthru1, * %out) { +; RV32-LABEL: mgather_nxv16i64: +; RV32: # %bb.0: +; RV32-NEXT: vl8re64.v v24, (a0) +; RV32-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: vsetvli a2, zero, e8,mf4,ta,mu +; RV32-NEXT: vslidedown.vx v0, v0, a0 +; RV32-NEXT: vsetvli a2, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v24, (zero), v12, v0.t +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: vs8r.v v24, (a0) +; RV32-NEXT: vs8r.v v16, (a1) +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv16i64: +; RV64: # %bb.0: +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: sub sp, sp, a3 +; RV64-NEXT: vl8re64.v v24, (a0) +; RV64-NEXT: vs8r.v v16, (sp) # Unknown-size Folded Spill +; RV64-NEXT: vmv8r.v v16, v8 +; RV64-NEXT: vl8re64.v v8, (a1) +; RV64-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v24, (zero), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; RV64-NEXT: vslidedown.vx v0, v0, a0 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vl8re8.v v16, (sp) # Unknown-size Folded Reload +; RV64-NEXT: vloxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: vs8r.v v8, (a0) +; RV64-NEXT: vs8r.v v24, (a2) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ret + %p0 = call @llvm.experimental.vector.insert.nxv8p0i64.nxv16p0i64( undef, %ptrs0, i64 0) + %p1 = call @llvm.experimental.vector.insert.nxv8p0i64.nxv16p0i64( %p0, %ptrs1, i64 8) + + %pt0 = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %passthru0, i64 0) + %pt1 = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( %pt0, %passthru1, i64 8) + + %v = call @llvm.masked.gather.nxv16i64.nxv16p0f64( %p1, i32 8, %m, %pt1) + store %v, * %out + ret void +} + + +declare @llvm.masked.gather.nxv1f16.nxv1p0f16(, i32, , ) + +define @mgather_nxv1f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1f16.nxv1p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2f16.nxv2p0f16(, i32, , ) + +define @mgather_nxv2f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2f16.nxv2p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4f16.nxv4p0f16(, i32, , ) + +define @mgather_nxv4f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4f16.nxv4p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4f16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4f16.nxv4p0f16( %ptrs, i32 2, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8f16.nxv8p0f16(, i32, , ) + +define @mgather_nxv8f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds half, half* %base, %idxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds half, half* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds half, half* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds half, half* %base, %idxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1f32.nxv1p0f32(, i32, , ) + +define @mgather_nxv1f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1f32.nxv1p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2f32.nxv2p0f32(, i32, , ) + +define @mgather_nxv2f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2f32.nxv2p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4f32.nxv4p0f32(, i32, , ) + +define @mgather_nxv4f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4f32.nxv4p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4f32( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4f32.nxv4p0f32( %ptrs, i32 4, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8f32.nxv8p0f32(, i32, , ) + +define @mgather_nxv8f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1f64.nxv1p0f64(, i32, , ) + +define @mgather_nxv1f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1f64.nxv1p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2f64.nxv2p0f64(, i32, , ) + +define @mgather_nxv2f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2f64.nxv2p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4f64.nxv4p0f64(, i32, , ) + +define @mgather_nxv4f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4f64.nxv4p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4f64( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4f64.nxv4p0f64( %ptrs, i32 8, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8f64.nxv8p0f64(, i32, , ) + +define @mgather_nxv8f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsext.vf8 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vzext.vf8 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsext.vf4 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vzext.vf4 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsext.vf2 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vzext.vf2 v24, v8 +; RV32-NEXT: vsll.vi v8, v24, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsll.vi v8, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv16i8.nxv16p0i8(, i32, , ) + +define @mgather_baseidx_nxv16i8(i8* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv16i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; RV32-NEXT: vsext.vf4 v16, v8 +; RV32-NEXT: vsetvli a1, zero, e8,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v16, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv16i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a2, zero, e8,mf4,ta,mu +; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v9 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v11, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i8, i8* %base, %idxs + %v = call @llvm.masked.gather.nxv16i8.nxv16p0i8( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv32i8.nxv32p0i8(, i32, , ) + +define @mgather_baseidx_nxv32i8(i8* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv32i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; RV32-NEXT: vsext.vf4 v16, v8 +; RV32-NEXT: vsetvli a1, zero, e8,m2,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: vsetvli a2, zero, e8,mf2,ta,mu +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; RV32-NEXT: vsext.vf4 v16, v10 +; RV32-NEXT: vsetvli a1, zero, e8,m2,tu,mu +; RV32-NEXT: vloxei32.v v14, (a0), v16, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv32i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: vsetvli a2, zero, e8,mf4,ta,mu +; RV64-NEXT: vslidedown.vx v25, v0, a1 +; RV64-NEXT: vmv1r.v v26, v0 +; RV64-NEXT: vsetvli a2, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v9 +; RV64-NEXT: vsetvli a2, zero, e8,m1,tu,mu +; RV64-NEXT: vmv1r.v v0, v25 +; RV64-NEXT: vloxei64.v v13, (a0), v16, v0.t +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: vsetvli a3, zero, e8,mf2,ta,mu +; RV64-NEXT: vslidedown.vx v26, v26, a2 +; RV64-NEXT: vsetvli a2, zero, e8,mf4,ta,mu +; RV64-NEXT: vslidedown.vx v0, v26, a1 +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v11 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v15, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vmv1r.v v0, v26 +; RV64-NEXT: vloxei64.v v14, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i8, i8* %base, %idxs + %v = call @llvm.masked.gather.nxv32i8.nxv32p0i8( %ptrs, i32 2, %m, %passthru) + ret %v +}