diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -171,6 +171,7 @@ VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, MGATHER, + MSCATTER, // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all @@ -391,7 +392,7 @@ SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc) const; - SDValue lowerMGATHER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMGATHERMSCATTER(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -466,6 +466,7 @@ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); setOperationAction(ISD::MGATHER, VT, Custom); + setOperationAction(ISD::MSCATTER, VT, Custom); } // Expand various CCs to best match the RVV ISA, which natively supports UNE @@ -501,6 +502,7 @@ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::MGATHER, VT, Custom); + setOperationAction(ISD::MSCATTER, VT, Custom); }; if (Subtarget.hasStdExtZfh()) @@ -1149,7 +1151,8 @@ case ISD::FMA: return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); case ISD::MGATHER: - return lowerMGATHER(Op, DAG); + case ISD::MSCATTER: + return lowerMGATHERMSCATTER(Op, DAG); } } @@ -2058,21 +2061,18 @@ // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or // truncated to XLEN and are treated as byte offsets. Any signed or scaled // indexing is extended to the XLEN value type and scaled accordingly. -SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const { - MaskedGatherSDNode *N = cast(Op.getNode()); +SDValue RISCVTargetLowering::lowerMGATHERMSCATTER(SDValue Op, + SelectionDAG &DAG) const { + auto *N = cast(Op.getNode()); SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue Index = N->getIndex(); SDValue Mask = N->getMask(); - SDValue PassThru = N->getPassThru(); MVT IndexVT = Index.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); assert(N->getBasePtr().getSimpleValueType() == XLenVT && "Unexpected pointer type"); - // Targets have to explicitly opt-in for extending vector loads> - assert(N->getExtensionType() == ISD::NON_EXTLOAD && - "Unexpected extending MGATHER"); // RISCV indexed loads only support the "unsigned unscaled" addressing mode, // so anything else must be manually legalized. @@ -2102,9 +2102,21 @@ } } - SDValue Ops[] = {N->getChain(), PassThru, N->getBasePtr(), Index, Mask}; - return DAG.getMemIntrinsicNode(RISCVISD::MGATHER, DL, - DAG.getVTList(VT, MVT::Other), Ops, + if (auto *MG = dyn_cast(N)) { + // Targets have to explicitly opt-in for extending vector loads. + assert(MG->getExtensionType() == ISD::NON_EXTLOAD && + "Unexpected extending MGATHER"); + SDValue PassThru = MG->getPassThru(); + SDValue Ops[] = {N->getChain(), PassThru, N->getBasePtr(), Index, Mask}; + return DAG.getMemIntrinsicNode(RISCVISD::MGATHER, DL, + DAG.getVTList(VT, MVT::Other), Ops, + N->getMemoryVT(), N->getMemOperand()); + } + + SDValue Ops[] = {N->getChain(), cast(N)->getValue(), + Mask, N->getBasePtr(), Index}; + return DAG.getMemIntrinsicNode(RISCVISD::MSCATTER, DL, + DAG.getVTList(MVT::Other), Ops, N->getMemoryVT(), N->getMemOperand()); } @@ -4702,6 +4714,7 @@ NODE_NAME_CASE(VLE_VL) NODE_NAME_CASE(VSE_VL) NODE_NAME_CASE(MGATHER) + NODE_NAME_CASE(MSCATTER) } // clang-format on return nullptr; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -106,12 +106,17 @@ def riscv_mgather : SDNode<"RISCVISD::MGATHER", SDTypeProfile<1, 4, [SDTCisVT<2, XLenVT>]>, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def riscv_mscatter : SDNode<"RISCVISD::MSCATTER", + SDTypeProfile<0, 4, [SDTCisVT<2, XLenVT>]>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; multiclass VPatILoadStoreSDNode { - defvar iload_name = "PseudoVLOXEI"#eew#"_V_"#idx_vti.LMul.MX#"_"#vti.LMul.MX; - defvar iload_instr = !cast(iload_name); - defvar iload_instr_mask = !cast(iload_name # "_MASK"); + defvar suffix = eew#"_V_"#idx_vti.LMul.MX#"_"#vti.LMul.MX; + defvar iload_instr = !cast("PseudoVLOXEI"#suffix); + defvar iload_instr_mask = !cast("PseudoVLOXEI"#suffix#"_MASK"); + defvar istore_instr = !cast("PseudoVSOXEI"#suffix); + defvar istore_instr_mask = !cast("PseudoVSOXEI"#suffix#"_MASK"); // Load def : Pat<(vti.Vector (riscv_mgather (vti.Vector srcvalue), RVVBaseAddr:$rs1, @@ -123,6 +128,17 @@ (idx_vti.Vector idx_vti.Vector:$rs2), (vti.Mask VMaskOp:$vm))), (iload_instr_mask $merge, $rs1, $rs2, $vm, vti.AVL, vti.SEW)>; + // Store + def : Pat<(riscv_mscatter (vti.Vector vti.Vector:$rs3), + (vti.Mask immAllOnesV), + RVVBaseAddr:$rs1, + (idx_vti.Vector idx_vti.Vector:$rs2)), + (istore_instr $rs3, $rs1, $rs2, vti.AVL, vti.SEW)>; + def : Pat<(riscv_mscatter (vti.Vector vti.Vector:$rs3), + (vti.Mask VMaskOp:$vm), + RVVBaseAddr:$rs1, + (idx_vti.Vector idx_vti.Vector:$rs2)), + (istore_instr_mask $rs3, $rs1, $rs2, $vm, vti.AVL, vti.SEW)>; } class VPatBinarySDNode_VV, , i32, ) + +define void @mscatter_nxv1i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1i8.nxv1p0i8( %val, %ptrs, i32 1, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(, , i32, ) + +define void @mscatter_nxv2i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( %val, %ptrs, i32 1, %m) + ret void +} + +define void @mscatter_nxv2i16_truncstore_nxv2i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV32-NEXT: vnsrl.wi v25, v8, 0 +; RV32-NEXT: vsoxei32.v v25, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV64-NEXT: vnsrl.wi v25, v8, 0 +; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t +; RV64-NEXT: ret + %tval = trunc %val to + call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( %tval, %ptrs, i32 1, %m) + ret void +} + +define void @mscatter_nxv2i32_truncstore_nxv2i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vnsrl.wi v25, v8, 0 +; RV32-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV32-NEXT: vnsrl.wi v26, v25, 0 +; RV32-NEXT: vsoxei32.v v26, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vnsrl.wi v25, v8, 0 +; RV64-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV64-NEXT: vnsrl.wi v26, v25, 0 +; RV64-NEXT: vsoxei64.v v26, (zero), v10, v0.t +; RV64-NEXT: ret + %tval = trunc %val to + call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( %tval, %ptrs, i32 1, %m) + ret void +} + +define void @mscatter_nxv2i64_truncstore_nxv2i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vnsrl.wi v25, v8, 0 +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vnsrl.wi v26, v25, 0 +; RV32-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV32-NEXT: vnsrl.wi v25, v26, 0 +; RV32-NEXT: vsoxei32.v v25, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vnsrl.wi v25, v8, 0 +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vnsrl.wi v26, v25, 0 +; RV64-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV64-NEXT: vnsrl.wi v25, v26, 0 +; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t +; RV64-NEXT: ret + %tval = trunc %val to + call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( %tval, %ptrs, i32 1, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(, , i32, ) + +define void @mscatter_nxv4i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4i8.nxv4p0i8( %val, %ptrs, i32 1, %m) + ret void +} + +define void @mscatter_truemask_nxv4i8( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4i8.nxv4p0i8( %val, %ptrs, i32 1, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(, , i32, ) + +define void @mscatter_nxv8i8( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8i8.nxv8p0i8( %val, %ptrs, i32 1, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8( %val, i8* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v9 +; RV32-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v9 +; RV64-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i8, i8* %base, %idxs + call void @llvm.masked.scatter.nxv8i8.nxv8p0i8( %val, %ptrs, i32 1, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(, , i32, ) + +define void @mscatter_nxv1i16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1i16.nxv1p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(, , i32, ) + +define void @mscatter_nxv2i16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_nxv2i32_truncstore_nxv2i16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vnsrl.wi v25, v8, 0 +; RV32-NEXT: vsoxei32.v v25, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vnsrl.wi v25, v8, 0 +; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t +; RV64-NEXT: ret + %tval = trunc %val to + call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( %tval, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_nxv2i64_truncstore_nxv2i16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vnsrl.wi v25, v8, 0 +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vnsrl.wi v26, v25, 0 +; RV32-NEXT: vsoxei32.v v26, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vnsrl.wi v25, v8, 0 +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vnsrl.wi v26, v25, 0 +; RV64-NEXT: vsoxei64.v v26, (zero), v10, v0.t +; RV64-NEXT: ret + %tval = trunc %val to + call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( %tval, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(, , i32, ) + +define void @mscatter_nxv4i16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_truemask_nxv4i16( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( %val, %ptrs, i32 2, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(, , i32, ) + +define void @mscatter_nxv8i16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8i16( %val, i16* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i16, i16* %base, %idxs + call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8i16( %val, i16* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i16, i16* %base, %eidxs + call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8i16( %val, i16* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i16, i16* %base, %eidxs + call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i16( %val, i16* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i16, i16* %base, %idxs + call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(, , i32, ) + +define void @mscatter_nxv1i32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1i32.nxv1p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(, , i32, ) + +define void @mscatter_nxv2i32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2i32.nxv2p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_nxv2i64_truncstore_nxv2i32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vnsrl.wi v25, v8, 0 +; RV32-NEXT: vsoxei32.v v25, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vnsrl.wi v25, v8, 0 +; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t +; RV64-NEXT: ret + %tval = trunc %val to + call void @llvm.masked.scatter.nxv2i32.nxv2p0i32( %tval, %ptrs, i32 4, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(, , i32, ) + +define void @mscatter_nxv4i32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_truemask_nxv4i32( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %val, %ptrs, i32 4, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(, , i32, ) + +define void @mscatter_nxv8i32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i16_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i16_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i16_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i32, i32* %base, %eidxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i32( %val, i32* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v12, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i32, i32* %base, %idxs + call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( %val, %ptrs, i32 4, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(, , i32, ) + +define void @mscatter_nxv1i64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(, , i32, ) + +define void @mscatter_nxv2i64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2i64.nxv2p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(, , i32, ) + +define void @mscatter_nxv4i64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4i64.nxv4p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_truemask_nxv4i64( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4i64.nxv4p0i64( %val, %ptrs, i32 8, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(, , i32, ) + +define void @mscatter_nxv8i64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i16_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v16, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v16, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i32_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v16, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds i64, i64* %base, %eidxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i64( %val, i64* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds i64, i64* %base, %idxs + call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( %val, %ptrs, i32 8, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(, , i32, ) + +define void @mscatter_nxv1f16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1f16.nxv1p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(, , i32, ) + +define void @mscatter_nxv2f16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2f16.nxv2p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(, , i32, ) + +define void @mscatter_nxv4f16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4f16.nxv4p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_truemask_nxv4f16( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4f16.nxv4p0f16( %val, %ptrs, i32 2, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(, , i32, ) + +define void @mscatter_nxv8f16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8f16( %val, half* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds half, half* %base, %idxs + call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8f16( %val, half* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds half, half* %base, %eidxs + call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8f16( %val, half* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds half, half* %base, %eidxs + call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8f16( %val, half* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v10 +; RV32-NEXT: vsll.vi v28, v28, 1 +; RV32-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vsll.vi v16, v16, 1 +; RV64-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds half, half* %base, %idxs + call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(, , i32, ) + +define void @mscatter_nxv1f32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1f32.nxv1p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(, , i32, ) + +define void @mscatter_nxv2f32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(, , i32, ) + +define void @mscatter_nxv4f32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_truemask_nxv4f32( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( %val, %ptrs, i32 4, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(, , i32, ) + +define void @mscatter_nxv8f32( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i16_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i16_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i16_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds float, float* %base, %eidxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +define void @mscatter_baseidx_nxv8f32( %val, float* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v12, 2 +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v16, v12 +; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds float, float* %base, %idxs + call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( %val, %ptrs, i32 4, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(, , i32, ) + +define void @mscatter_nxv1f64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(, , i32, ) + +define void @mscatter_nxv2f64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(, , i32, ) + +define void @mscatter_nxv4f64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_truemask_nxv4f64( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + %mhead = insertelement undef, i1 1, i32 0 + %mtrue = shufflevector %mhead, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( %val, %ptrs, i32 8, %mtrue) + ret void +} + +declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(, , i32, ) + +define void @mscatter_nxv8f64( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf4 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf4 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsext.vf2 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i16_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vzext.vf2 v28, v16 +; RV32-NEXT: vsll.vi v28, v28, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v16, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v16, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i32_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; RV32-NEXT: vsll.vi v28, v16, 3 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vzext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +} + +define void @mscatter_baseidx_nxv8f64( %val, double* %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( %val, %ptrs, i32 8, %m) + ret void +}