diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -947,6 +947,7 @@ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); + SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N); @@ -984,6 +985,7 @@ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); + SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3687,6 +3687,9 @@ case ISD::VP_LOAD: Res = WidenVecRes_VP_LOAD(cast(N)); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + Res = WidenVecRes_VP_STRIDED_LOAD(cast(N)); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast(N)); break; @@ -4919,6 +4922,33 @@ return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { + SDLoc DL(N); + + // The mask should be widened as well + SDValue Mask = N->getMask(); + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided load"); + Mask = GetWidenedVector(Mask); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(Mask.getValueType().getVectorElementCount() == + WidenVT.getVectorElementCount() && + "Data and mask vectors should have the same number of elements"); + + SDValue Res = DAG.getStridedLoadVP( + N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(), + N->getBasePtr(), N->getOffset(), N->getStride(), Mask, + N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(), + N->isExpandingLoad()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -5436,6 +5466,9 @@ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo); + break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; @@ -5914,6 +5947,38 @@ ST->isCompressingStore()); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N, + unsigned OpNo) { + assert((OpNo == 1 || OpNo == 4) && + "Can widen only data or mask operand of vp_strided_store"); + VPStridedStoreSDNode *SST = cast(N); + SDValue Mask = SST->getMask(); + SDValue StVal = SST->getValue(); + SDLoc DL(N); + + if (OpNo == 1) + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided store"); + else + assert(getTypeAction(StVal.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided store"); + + StVal = GetWidenedVector(StVal); + Mask = GetWidenedVector(Mask); + + assert(StVal.getValueType().getVectorElementCount() == + Mask.getValueType().getVectorElementCount() && + "Data and mask vectors should have the same number of elements"); + + return DAG.getStridedStoreVP( + SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(), + SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(), + SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(), + SST->isCompressingStore()); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of mstore"); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -563,3 +563,40 @@ %load = call <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0f64.i32(double* %ptr, i32 %stride, <8 x i1> %m, i32 %evl) ret <8 x double> %load } + +; Widening +define <3 x double> @strided_vpload_v3f64(double* %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpload_v3f64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpload_v3f64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t +; CHECK-RV64-NEXT: ret + %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double* %ptr, i32 %stride, <3 x i1> %mask, i32 %evl) + ret <3 x double> %v +} + +define <3 x double> @strided_vpload_v3f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpload_v3f64_allones_mask: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpload_v3f64_allones_mask: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1 +; CHECK-RV64-NEXT: ret + %one = insertelement <3 x i1> poison, i1 true, i32 0 + %allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer + %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double* %ptr, i32 %stride, <3 x i1> %allones, i32 %evl) + ret <3 x double> %v +} + +declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double*, i32, <3 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -455,3 +455,40 @@ call void @llvm.experimental.vp.strided.store.v2i8.p0i8.i32(<2 x i8> %val, i8* %ptr, i32 %stride, <2 x i1> %b, i32 %evl) ret void } + +; Widening +define void @strided_vpstore_v3f32(<3 x float> %v, float *%ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpstore_v3f32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1, v0.t +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpstore_v3f32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1, v0.t +; CHECK-RV64-NEXT: ret + call void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float> %v, float* %ptr, i32 %stride, <3 x i1> %mask, i32 %evl) + ret void +} + +define void @strided_vpstore_v3f32_allones_mask(<3 x float> %v, float *%ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpstore_v3f32_allones_mask: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpstore_v3f32_allones_mask: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1 +; CHECK-RV64-NEXT: ret + %one = insertelement <3 x i1> poison, i1 true, i32 0 + %allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer + call void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float> %v, float* %ptr, i32 %stride, <3 x i1> %allones, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float>, float* , i32, <3 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -723,3 +723,40 @@ %load = call @llvm.experimental.vp.strided.load.nxv8f64.p0f64.i32(double* %ptr, i32 signext %stride, %m, i32 %evl) ret %load } + +; Widening +define @strided_vpload_nxv3f64(double* %ptr, i32 signext %stride, %mask, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpload_nxv3f64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpload_nxv3f64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t +; CHECK-RV64-NEXT: ret + %v = call @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double* %ptr, i32 %stride, %mask, i32 %evl) + ret %v +} + +define @strided_vpload_nxv3f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpload_nxv3f64_allones_mask: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpload_nxv3f64_allones_mask: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1 +; CHECK-RV64-NEXT: ret + %one = insertelement poison, i1 true, i32 0 + %allones = shufflevector %one, poison, zeroinitializer + %v = call @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double* %ptr, i32 %stride, %allones, i32 %evl) + ret %v +} + +declare @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double*, i32, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -579,3 +579,40 @@ call void @llvm.experimental.vp.strided.store.nxv1i8.p0i8.i32( %val, i8* %ptr, i32 %strided, %b, i32 %evl) ret void } + +; Widening +define void @strided_vpstore_nxv3f32( %v, float *%ptr, i32 signext %stride, %mask, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpstore_nxv3f32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1, v0.t +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpstore_nxv3f32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1, v0.t +; CHECK-RV64-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32( %v, float* %ptr, i32 %stride, %mask, i32 %evl) + ret void +} + +define void @strided_vpstore_nxv3f32_allones_mask( %v, float *%ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-RV32-LABEL: strided_vpstore_nxv3f32_allones_mask: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: strided_vpstore_nxv3f32_allones_mask: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1 +; CHECK-RV64-NEXT: ret + %one = insertelement poison, i1 true, i32 0 + %allones = shufflevector %one, poison, zeroinitializer + call void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32( %v, float* %ptr, i32 %stride, %allones, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(, float* , i32, , i32)