diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -133,6 +133,7 @@ VECREDUCE_XOR, VECREDUCE_FADD, VECREDUCE_SEQ_FADD, + MGATHER = ISD::FIRST_TARGET_MEMORY_OPCODE, }; } // namespace RISCVISD @@ -300,6 +301,8 @@ Value *NewVal, Value *Mask, AtomicOrdering Ord) const override; + bool shouldRemoveExtendFromGSIndex(EVT VT) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, @@ -336,6 +339,7 @@ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMGATHER(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -438,6 +438,8 @@ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + + setOperationAction(ISD::MGATHER, VT, Custom); } // Expand various CCs to best match the RVV ISA, which natively supports UNE @@ -471,6 +473,8 @@ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); + + setOperationAction(ISD::MGATHER, VT, Custom); }; if (Subtarget.hasStdExtZfh()) @@ -928,6 +932,8 @@ case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_SEQ_FADD: return lowerFPVECREDUCE(Op, DAG); + case ISD::MGATHER: + return lowerMGATHER(Op, DAG); } } @@ -1741,6 +1747,61 @@ DAG.getConstant(0, DL, Subtarget.getXLenVT())); } +// Custom lower MGATHER to a legalized form for RVV. It will then be matched to +// a RVV indexed load. The RVV indexed load/store instructions only support the +// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or +// truncated to XLEN and are treated as byte offsets. Any signed or scaled +// indexing is extended to the XLEN value type and scaled accordingly. +SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const { + MaskedGatherSDNode *N = cast(Op.getNode()); + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue PassThru = N->getPassThru(); + MVT IndexVT = Index.getSimpleValueType(); + + MVT XLenVT = Subtarget.getXLenVT(); + assert(N->getBasePtr().getSimpleValueType() == XLenVT && + "Unexpected pointer type"); + // Targets have to explicitly opt-in for extending vector loads> + assert(N->getExtensionType() == ISD::NON_EXTLOAD && + "Unexpected extending MGATHER"); + + // RISCV indexed loads only support the "unsigned unscaled" addressing mode, + // so anything else must be manually legalized. + if (N->isIndexScaled() || + (N->isIndexSigned() && IndexVT.getVectorElementType().bitsLT(XLenVT))) { + // Scale the indices up to pointer size (XLEN). This is required for all + // signed indices (we need the sign bit at position XLEN-1) and for + // unsigned scaled indices it helps prevent overflow when scaling. + if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { + IndexVT = IndexVT.changeVectorElementType(XLenVT); + Index = + DAG.getNode(N->isIndexSigned() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + DL, IndexVT, Index); + } + // Manually scale the indices by the element size + if (N->isIndexScaled() && N->getConstantOperandVal(5) != 1) { + bool IsRV32E64 = + !Subtarget.is64Bit() && IndexVT.getVectorElementType() == MVT::i64; + SDValue SplatScale = DAG.getConstant(Log2_32(N->getConstantOperandVal(5)), + DL, Subtarget.getXLenVT()); + if (!IsRV32E64) + SplatScale = DAG.getSplatVector(IndexVT, DL, SplatScale); + else + SplatScale = + DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IndexVT, SplatScale); + Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); + } + } + + SDValue Ops[] = {N->getChain(), PassThru, N->getBasePtr(), Index, Mask}; + return DAG.getMemIntrinsicNode(RISCVISD::MGATHER, DL, + DAG.getVTList(VT, MVT::Other), Ops, + N->getMemoryVT(), N->getMemOperand()); +} + // Returns the opcode of the target-specific SDNode that implements the 32-bit // form of the given Opcode. static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { @@ -4309,6 +4370,7 @@ NODE_NAME_CASE(VECREDUCE_XOR) NODE_NAME_CASE(VECREDUCE_FADD) NODE_NAME_CASE(VECREDUCE_SEQ_FADD) + NODE_NAME_CASE(MGATHER) } // clang-format on return nullptr; @@ -4666,6 +4728,11 @@ return Result; } +bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { + EVT EltVT = VT.getVectorElementType(); + return EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32; +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -81,6 +81,28 @@ (store_instr VR:$rs2, RVVBaseAddr:$rs1, m.AVL, m.SEW)>; } +def riscv_mgather : SDNode<"RISCVISD::MGATHER", + SDTypeProfile<1, 4, [SDTCisVT<2, XLenVT>]>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +multiclass VPatILoadStoreSDNode { + defvar iload_name = "PseudoVLOXEI"#eew#"_V_"#idx_vti.LMul.MX#"_"#vti.LMul.MX; + defvar iload_instr = !cast(iload_name); + defvar iload_instr_mask = !cast(iload_name # "_MASK"); + // Load + def : Pat<(vti.Vector (riscv_mgather (vti.Vector srcvalue), + RVVBaseAddr:$rs1, + (idx_vti.Vector idx_vti.Vector:$rs2), + (vti.Mask immAllOnesV))), + (iload_instr $rs1, $rs2, vti.AVL, vti.SEW)>; + def : Pat<(vti.Vector (riscv_mgather (vti.Vector GetVRegNoV0.R:$merge), + RVVBaseAddr:$rs1, + (idx_vti.Vector idx_vti.Vector:$rs2), + (vti.Mask VMaskOp:$vm))), + (iload_instr_mask $merge, $rs1, $rs2, $vm, vti.AVL, vti.SEW)>; +} + class VPatBinarySDNode_VV; +// 7.6. Vector Indexed Instructions +foreach vti = AllVectors in { + foreach eew = EEWList in { + + defvar vlmul = vti.LMul; + defvar octuple_lmul = octuple_from_str.ret; + defvar log_sew = shift_amount.val; + + // The data vector register group has EEW=SEW, EMUL=LMUL, while the offset + // vector register group has EEW encoding in the instruction and EMUL=(EEW/SEW)*LMUL. + // calculate octuple elmul which is (eew * octuple_lmul) >> log_sew + defvar octuple_elmul = !srl(!mul(eew, octuple_lmul), log_sew); + // legal octuple elmul should be more than 0 and less than equal 64 + if !gt(octuple_elmul, 0) then { + if !le(octuple_elmul, 64) then { + defvar elmul_str = octuple_to_str.ret; + //defvar elmul =!cast("V_" # elmul_str); + defvar idx_vti = !cast("VI" # eew # elmul_str); + defm "" : VPatILoadStoreSDNode; + } + } + + } +} + // 12.1. Vector Single-Width Integer Add and Subtract defm "" : VPatBinarySDNode_VV_VX_VI; defm "" : VPatBinarySDNode_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -0,0 +1,2034 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 + +declare @llvm.masked.gather.nxv1i8.nxv1p0i8(, i32, , ) + +define @mgather_nxv1i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf8,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i8.nxv1p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i8.nxv2p0i8(, i32, , ) + +define @mgather_nxv2i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +define @mgather_nxv2i8_sextload_nxv2i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vsext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vsext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i8_zextload_nxv2i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vzext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vzext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = zext %v to + ret %ev +} + +define @mgather_nxv2i8_sextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vsext.vf4 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vsext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i8_zextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vzext.vf4 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vzext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = zext %v to + ret %ev +} + +define @mgather_nxv2i8_sextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsext.vf8 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsext.vf8 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i8_zextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vzext.vf8 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vzext.vf8 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i8.nxv2p0i8( %ptrs, i32 1, %m, %passthru) + %ev = zext %v to + ret %ev +} + +declare @llvm.masked.gather.nxv4i8.nxv4p0i8(, i32, , ) + +define @mgather_nxv4i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf2,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i8.nxv4p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i8( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i8.nxv4p0i8( %ptrs, i32 1, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i8.nxv8p0i8(, i32, , ) + +define @mgather_nxv8i8( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i8.nxv8p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8(i8* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli a1, zero, e8,m1,tu,mu +; RV64-NEXT: vloxei64.v v9, (a0), v16, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i8, i8* %base, %idxs + %v = call @llvm.masked.gather.nxv8i8.nxv8p0i8( %ptrs, i32 1, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1i16.nxv1p0i16(, i32, , ) + +define @mgather_nxv1i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i16.nxv1p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i16.nxv2p0i16(, i32, , ) + +define @mgather_nxv2i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_nxv2i16_sextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vsext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vsext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i16_zextload_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vzext.vf2 v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vzext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = zext %v to + ret %ev +} + +define @mgather_nxv2i16_sextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsext.vf4 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i16_zextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vzext.vf4 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vzext.vf4 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i16.nxv2p0i16( %ptrs, i32 2, %m, %passthru) + %ev = zext %v to + ret %ev +} + +declare @llvm.masked.gather.nxv4i16.nxv4p0i16(, i32, , ) + +define @mgather_nxv4i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i16.nxv4p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i16.nxv4p0i16( %ptrs, i32 2, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i16.nxv8p0i16(, i32, , ) + +define @mgather_nxv8i16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i16, i16* %base, %idxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i16, i16* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i16, i16* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16(i16* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i16, i16* %base, %idxs + %v = call @llvm.masked.gather.nxv8i16.nxv8p0i16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1i32.nxv1p0i32(, i32, , ) + +define @mgather_nxv1i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i32.nxv1p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i32.nxv2p0i32(, i32, , ) + +define @mgather_nxv2i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i32.nxv2p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_nxv2i32_sextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsext.vf2 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i32.nxv2p0i32( %ptrs, i32 4, %m, %passthru) + %ev = sext %v to + ret %ev +} + +define @mgather_nxv2i32_zextload_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vzext.vf2 v26, v9 +; RV32-NEXT: vmv2r.v v8, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vzext.vf2 v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i32.nxv2p0i32( %ptrs, i32 4, %m, %passthru) + %ev = zext %v to + ret %ev +} + +declare @llvm.masked.gather.nxv4i32.nxv4p0i32(, i32, , ) + +define @mgather_nxv4i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i32( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %ptrs, i32 4, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i32.nxv8p0i32(, i32, , ) + +define @mgather_nxv8i32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i32(i32* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + %v = call @llvm.masked.gather.nxv8i32.nxv8p0i32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1i64.nxv1p0i64(, i32, , ) + +define @mgather_nxv1i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1i64.nxv1p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2i64.nxv2p0i64(, i32, , ) + +define @mgather_nxv2i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2i64.nxv2p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4i64.nxv4p0i64(, i32, , ) + +define @mgather_nxv4i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4i64.nxv4p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4i64( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4i64.nxv4p0i64( %ptrs, i32 8, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8i64.nxv8p0i64(, i32, , ) + +define @mgather_nxv8i64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i64(i64* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsll.vi v8, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + %v = call @llvm.masked.gather.nxv8i64.nxv8p0i64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1f16.nxv1p0f16(, i32, , ) + +define @mgather_nxv1f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1f16.nxv1p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2f16.nxv2p0f16(, i32, , ) + +define @mgather_nxv2f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2f16.nxv2p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4f16.nxv4p0f16(, i32, , ) + +define @mgather_nxv4f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4f16.nxv4p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4f16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4f16.nxv4p0f16( %ptrs, i32 2, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8f16.nxv8p0f16(, i32, , ) + +define @mgather_nxv8f16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds half, half* %base, %idxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds half, half* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds half, half* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8f16(half* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (a0), v28, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds half, half* %base, %idxs + %v = call @llvm.masked.gather.nxv8f16.nxv8p0f16( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1f32.nxv1p0f32(, i32, , ) + +define @mgather_nxv1f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1f32.nxv1p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2f32.nxv2p0f32(, i32, , ) + +define @mgather_nxv2f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2f32.nxv2p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4f32.nxv4p0f32(, i32, , ) + +define @mgather_nxv4f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4f32.nxv4p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4f32( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4f32.nxv4p0f32( %ptrs, i32 4, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8f32.nxv8p0f32(, i32, , ) + +define @mgather_nxv8f32( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8f32(float* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 2 +; RV32-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + %v = call @llvm.masked.gather.nxv8f32.nxv8p0f32( %ptrs, i32 4, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv1f64.nxv1p0f64(, i32, , ) + +define @mgather_nxv1f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV32-NEXT: vloxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; RV64-NEXT: vloxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1f64.nxv1p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2f64.nxv2p0f64(, i32, , ) + +define @mgather_nxv2f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV32-NEXT: vloxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; RV64-NEXT: vloxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2f64.nxv2p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4f64.nxv4p0f64(, i32, , ) + +define @mgather_nxv4f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV32-NEXT: vloxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; RV64-NEXT: vloxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4f64.nxv4p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4f64( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vloxei32.v v8, (zero), v8 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vloxei64.v v8, (zero), v8 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + %v = call @llvm.masked.gather.nxv4f64.nxv4p0f64( %ptrs, i32 8, %mtrue, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8f64.nxv8p0f64(, i32, , ) + +define @mgather_nxv8f64( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8f64(double* %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsll.vi v8, v8, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV32-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsetvli a1, zero, e64,m8,tu,mu +; RV64-NEXT: vloxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.masked.gather.nxv8f64.nxv8p0f64( %ptrs, i32 8, %m, %passthru) + ret %v +}