diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -936,6 +936,7 @@ SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); + SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N); SDValue WidenVecRes_ScalarOp(SDNode* N); SDValue WidenVecRes_Select(SDNode *N); SDValue WidenVSELECTMask(SDNode *N); @@ -972,6 +973,7 @@ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3249,6 +3249,9 @@ case ISD::MGATHER: Res = WidenVecRes_MGATHER(cast(N)); break; + case ISD::VP_GATHER: + Res = WidenVecRes_VP_GATHER(cast(N)); + break; case ISD::ADD: case ISD::VP_ADD: case ISD::AND: case ISD::VP_AND: @@ -4486,6 +4489,29 @@ return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue Scale = N->getScale(); + ElementCount WideEC = WideVT.getVectorElementCount(); + SDLoc dl(N); + + SDValue Index = GetWidenedVector(N->getIndex()); + EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), + N->getMemoryVT().getScalarType(), WideEC); + Mask = GetWidenedMask(Mask, WideEC); + + SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale, + Mask, N->getVectorLength()}; + SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT, + dl, Ops, N->getMemOperand(), N->getIndexType()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); @@ -4914,6 +4940,7 @@ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; + case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; @@ -5458,6 +5485,34 @@ MSC->isTruncatingStore()); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { + VPScatterSDNode *VPSC = cast(N); + SDValue DataOp = VPSC->getValue(); + SDValue Mask = VPSC->getMask(); + SDValue Index = VPSC->getIndex(); + SDValue Scale = VPSC->getScale(); + EVT WideMemVT = VPSC->getMemoryVT(); + + if (OpNo == 1) { + DataOp = GetWidenedVector(DataOp); + Index = GetWidenedVector(Index); + const auto WideEC = DataOp.getValueType().getVectorElementCount(); + Mask = GetWidenedMask(Mask, WideEC); + WideMemVT = EVT::getVectorVT(*DAG.getContext(), + VPSC->getMemoryVT().getScalarType(), WideEC); + } else if (OpNo == 4) { + // Just widen the index. It's allowed to have extra elements. + Index = GetWidenedVector(Index); + } else + llvm_unreachable("Can't widen this operand of mscatter"); + + SDValue Ops[] = { + VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask, + VPSC->getVectorLength()}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops, + VPSC->getMemOperand(), VPSC->getIndexType()); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -150,6 +150,46 @@ ret <2 x i64> %ev } +declare <3 x i8> @llvm.vp.gather.v3i8.v3p0i8(<3 x i8*>, <3 x i1>, i32) + +define <3 x i8> @vpgather_v3i8(<3 x i8*> %ptrs, <3 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_v3i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_v3i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0i8(<3 x i8*> %ptrs, <3 x i1> %m, i32 %evl) + ret <3 x i8> %v +} + +define <3 x i8> @vpgather_truemask_v3i8(<3 x i8*> %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpgather_truemask_v3i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_truemask_v3i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8 +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %mhead = insertelement <3 x i1> undef, i1 1, i32 0 + %mtrue = shufflevector <3 x i1> %mhead, <3 x i1> undef, <3 x i32> zeroinitializer + %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0i8(<3 x i8*> %ptrs, <3 x i1> %mtrue, i32 %evl) + ret <3 x i8> %v +} + declare <4 x i8> @llvm.vp.gather.v4i8.v4p0i8(<4 x i8*>, <4 x i1>, i32) define <4 x i8> @vpgather_v4i8(<4 x i8*> %ptrs, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -236,6 +236,42 @@ ret void } +declare void @llvm.vp.scatter.v3i16.v3p0i16(<3 x i16>, <3 x i16*>, <3 x i1>, i32) + +define void @vpscatter_v3i16(<3 x i16> %val, <3 x i16*> %ptrs, <3 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_v3i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_v3i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.v3i16.v3p0i16(<3 x i16> %val, <3 x i16*> %ptrs, <3 x i1> %m, i32 %evl) + ret void +} + +define void @vpscatter_truemask_v3i16(<3 x i16> %val, <3 x i16*> %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_truemask_v3i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_truemask_v3i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10 +; RV64-NEXT: ret + %mhead = insertelement <3 x i1> undef, i1 1, i32 0 + %mtrue = shufflevector <3 x i1> %mhead, <3 x i1> undef, <3 x i32> zeroinitializer + call void @llvm.vp.scatter.v3i16.v3p0i16(<3 x i16> %val, <3 x i16*> %ptrs, <3 x i1> %mtrue, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, <4 x i1>, i32) define void @vpscatter_v4i16(<4 x i16> %val, <4 x i16*> %ptrs, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1701,6 +1701,272 @@ ret %v } +declare @llvm.vp.gather.nxv6f64.nxv6p0f64(, , i32) + +define @vpgather_nxv6f64( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv6i8_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv6i8_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsext.vf8 v16, v8 +; RV32-NEXT: vsll.vi v8, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv6i8_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vzext.vf8 v16, v8 +; RV32-NEXT: vsll.vi v8, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf8 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv6i16_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv6i16_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsext.vf4 v16, v8 +; RV32-NEXT: vsll.vi v8, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv6i16_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vzext.vf4 v16, v8 +; RV32-NEXT: vsll.vi v8, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf4 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv6i32_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv6i32_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsext.vf2 v16, v8 +; RV32-NEXT: vsll.vi v8, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv6i32_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vzext.vf2 v16, v8 +; RV32-NEXT: vsll.vi v8, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv6f64(double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsll.vi v8, v8, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v16, v8, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + %v = call @llvm.vp.gather.nxv6f64.nxv6p0f64( %ptrs, %m, i32 %evl) + ret %v +} + declare @llvm.vp.gather.nxv8f64.nxv8p0f64(, , i32) define @vpgather_nxv8f64( %ptrs, %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -1542,6 +1542,271 @@ ret void } +declare void @llvm.vp.scatter.nxv6f64.nxv6p0f64(, , , i32) + +define void @vpscatter_nxv6f64( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv6i8_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf4 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsext.vf8 v24, v16 +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vzext.vf8 v24, v16 +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf8 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv6i16_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsext.vf2 v20, v16 +; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsext.vf4 v24, v16 +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vzext.vf4 v24, v16 +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf4 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv6i32_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsext.vf2 v24, v16 +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vzext.vf2 v24, v16 +; RV32-NEXT: vsll.vi v16, v24, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vzext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds double, double* %base, %eidxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv6f64( %val, double* %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv6f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vnsrl.wi v24, v16, 0 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv6f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds double, double* %base, %idxs + call void @llvm.vp.scatter.nxv6f64.nxv6p0f64( %val, %ptrs, %m, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.nxv8f64.nxv8p0f64(, , , i32) define void @vpscatter_nxv8f64( %val, %ptrs, %m, i32 zeroext %evl) {