diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15245,9 +15245,9 @@ DAG.getNode(ISD::SHL, DL, MVT::i64, ExtLength, DAG.getConstant(56 + countTrailingZeros(EltBytes), DL, getPointerTy(DAG.getDataLayout()))); - DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), - N->getOperand(2), ShiftedLength, N->getOperand(4)); - return SDValue(N, 0); + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[4] = ShiftedLength; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); break; } case ISD::VP_STORE: { @@ -15272,9 +15272,9 @@ DAG.getNode(ISD::SHL, DL, MVT::i64, ExtLength, DAG.getConstant(56 + countTrailingZeros(EltBytes), DL, getPointerTy(DAG.getDataLayout()))); - DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3), ShiftedLength); - return SDValue(N, 0); + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[5] = ShiftedLength; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); break; } case ISD::INTRINSIC_WO_CHAIN: { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2434,6 +2434,32 @@ (v2i64 (XXSPLTW EQWSHAND, 2)), 0)); } +def SDTVPLoad: SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisSameNumEltsAs<0, 3>, SDTCisInt<3>, SDTCisInt<4> +]>; +def SDTVPStore: SDTypeProfile<0, 5, [ + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisSameNumEltsAs<0, 3>, SDTCisInt<3>, SDTCisInt<4> +]>; +def vp_load : SDNode<"ISD::VP_LOAD", SDTVPLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def vp_store : SDNode<"ISD::VP_STORE", SDTVPStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def load_vl : PatFrags<(ops node:$src1, node:$src2), [ + (vp_load node:$src1, undef, undef, node:$src2), + (vp_load node:$src1, undef, immAllZerosV, node:$src2) + ], [{ + return !cast(N)->isExpandingLoad() && + cast(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast(N)->isUnindexed(); +}]>; +def store_vl : PatFrags<(ops node:$src1, node:$src2, node:$src3), [ + (vp_store node:$src1, node:$src2, undef, undef, node:$src3), + (vp_store node:$src1, node:$src2, undef, immAllZerosV, node:$src3) + ], [{ + return !cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); +}]>; + //---------------------------- Anonymous Patterns ----------------------------// // Predicate combinations are kept in roughly chronological order in terms of // instruction availability in the architecture. For example, VSX came in with @@ -3861,6 +3887,21 @@ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, XForm:$dst), (STXVX $rS, XForm:$dst)>; +// ld/st-with-length patterns +foreach vt = [ v2i64, v4i32, v2f64, v4f32, ] in { + def : Pat<(!cast(""#vt) (load_vl addr:$src, i64:$rB)), + (LXVL $src, $rB)>; + def : Pat<(store_vl !cast(""#vt):$rS, addr:$dst, i64:$rB), + (STXVL $rS, $dst, $rB)>; +} +// We have to do v8i16 and v16i8 separately because they are not in VSRC. +foreach vt = [ v8i16, v16i8, ] in { + def : Pat<(!cast(""#vt) (load_vl addr:$src, i64:$rB)), + (COPY_TO_REGCLASS (LXVL $src, $rB), VRRC)>; + def : Pat<(store_vl !cast(""#vt):$rS, addr:$dst, i64:$rB), + (STXVL (COPY_TO_REGCLASS $rS, VSRC), $dst, $rB)>; +} + // Build vectors from i8 loads defm : ScalToVecWPermute(PI.getMaskParam())) + return Illegal; switch (PI.getIntrinsicID()) { default: return Illegal; @@ -1354,8 +1357,11 @@ // therefore cannot be used in 32-bit mode. if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64()) return Illegal; - Type *DataType = PI.getMemoryDataParam()->getType(); - if (auto *VecTy = dyn_cast(DataType)) { + Type *DataType = PI.getIntrinsicID() == Intrinsic::vp_load + ? PI.getType() + : PI.getMemoryDataParam()->getType(); + if (isa(DataType)) { + // auto *VecTy = dyn_cast(DataType); unsigned VecWidth = DataType->getPrimitiveSizeInBits(); return VecWidth == 128 ? Legal : Illegal; } diff --git a/llvm/test/CodeGen/PowerPC/ldst-with-length-vector.ll b/llvm/test/CodeGen/PowerPC/ldst-with-length-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ldst-with-length-vector.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +define void @partial_st_4i32(<4 x i32>* %ptr, <4 x i32> %value, i32 %len) { +; CHECK-LABEL: partial_st_4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sldi 4, 7, 58 +; CHECK-NEXT: stxvl 34, 3, 4 +; CHECK-NEXT: blr + call void @llvm.vp.store.v4i32.p0v4i32.i64(<4 x i32> %value, <4 x i32>* %ptr, <4 x i1> undef, i32 %len) + ret void +} +declare void @llvm.vp.store.v4i32.p0v4i32.i64(<4 x i32>, <4 x i32>*, <4 x i1>, i32) + +define <4 x i32> @partial_ld_4i32(<4 x i32>* %ptr, i32 %len) { +; CHECK-LABEL: partial_ld_4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sldi 4, 4, 58 +; CHECK-NEXT: lxvl 34, 3, 4 +; CHECK-NEXT: blr + %res = call <4 x i32> @llvm.vp.load.v4i32.p0v4i32.i64(<4 x i32>* %ptr, <4 x i1> undef, i32 %len) + ret <4 x i32> %res +} +declare <4 x i32> @llvm.vp.load.v4i32.p0v4i32.i64(<4 x i32>*, <4 x i1>, i32) +