diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -19912,6 +19912,126 @@ call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %val, <8 x i8>* %ptr, i32 4, <8 x i1> %mask) +.. _int_experimental_vp_strided_load: + +'``llvm.experimental.vp.strided.load``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x float> @llvm.experimental.vp.strided.load.v4f32.i64(float* %ptr, i64 %stride, <4 x i1> %mask, i32 %evl) + declare @llvm.experimental.vp.strided.load.nxv2i16.i64(i16* %ptr, i64 %stride, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.experimental.vp.strided.load``' intrinsic loads, into a vector, scalar values from +memory locations evenly spaced apart by '``stride``' number of bytes, starting from '``ptr``'. + +Arguments: +"""""""""" + +The first operand is the base pointer for the load. The second operand is the stride +value expressed in bytes. The third operand is a vector of boolean values +with the same number of elements as the return type. The fourth is the explicit +vector length of the operation. The base pointer underlying type matches the type of the scalar +elements of the return operand. + +The :ref:`align ` parameter attribute can be provided for the first +operand. + +Semantics: +"""""""""" + +The '``llvm.experimental.vp.strided.load``' intrinsic loads, into a vector, multiple scalar +values from memory in the same way as the :ref:`llvm.vp.gather ` intrinsic, +where the vector of pointers is in the form: + + ``%ptrs = <%ptr, %ptr + %stride, %ptr + 2 * %stride, ... >``, + +with '``ptr``' previously casted to a pointer '``i8``', '``stride``' always interpreted as a signed +integer and all arithmetic occurring in the pointer type. + +Examples: +""""""""" + +.. code-block:: text + + %r = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.i64(i64* %ptr, i64 %stride, <8 x i64> %mask, i32 %evl) + ;; The operation can also be expressed like this: + + %addr = bitcast i64* %ptr to i8* + ;; Create a vector of pointers %addrs in the form: + ;; %addrs = <%addr, %addr + %stride, %addr + 2 * %stride, ...> + %ptrs = bitcast <8 x i8* > %addrs to <8 x i64* > + %also.r = call <8 x i64> @llvm.vp.gather.v8i64.v8p0i64(<8 x i64* > %ptrs, <8 x i64> %mask, i32 %evl) + + +.. _int_experimental_vp_strided_store: + +'``llvm.experimental.vp.strided.store``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare void @llvm.experimental.vp.strided.store.v4f32.i64(<4 x float> %val, float* %ptr, i64 %stride, <4 x i1> %mask, i32 %evl) + declare void @llvm.experimental.vp.strided.store.nxv2i16.i64( %val, i16* %ptr, i64 %stride, %mask, i32 %evl) + +Overview: +""""""""" + +The '``@llvm.experimental.vp.strided.store``' intrinsic stores the elements of +'``val``' into memory locations evenly spaced apart by '``stride``' number of +bytes, starting from '``ptr``'. + +Arguments: +"""""""""" + +The first operand is the vector value to be written to memory. The second +operand is the base pointer for the store. Its underlying type matches the +scalar element type of the value operand. The third operand is the stride value +expressed in bytes. The fourth operand is a vector of boolean values with the +same number of elements as the return type. The fifth is the explicit vector +length of the operation. + +The :ref:`align ` parameter attribute can be provided for the +second operand. + +Semantics: +"""""""""" + +The '``llvm.experimental.vp.strided.store``' intrinsic stores the elements of +'``val``' in the same way as the :ref:`llvm.vp.scatter ` intrinsic, +where the vector of pointers is in the form: + + ``%ptrs = <%ptr, %ptr + %stride, %ptr + 2 * %stride, ... >``, + +with '``ptr``' previously casted to a pointer '``i8``', '``stride``' always interpreted as a signed +integer and all arithmetic occurring in the pointer type. + +Examples: +""""""""" + +.. code-block:: text + + call void @llvm.experimental.vp.strided.store.v8i64.i64(<8 x i64> %val, i64* %ptr, i64 %stride, <8 x i1> %mask, i32 %evl) + ;; The operation can also be expressed like this: + + %addr = bitcast i64* %ptr to i8* + ;; Create a vector of pointers %addrs in the form: + ;; %addrs = <%addr, %addr + %stride, %addr + 2 * %stride, ...> + %ptrs = bitcast <8 x i8* > %addrs to <8 x i64* > + call void @llvm.vp.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, <8 x i1> %mask, i32 %evl) + + .. _int_vp_gather: '``llvm.vp.gather``' Intrinsic diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1364,6 +1364,77 @@ SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); + SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + Align Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + const MDNode *Ranges = nullptr, + bool IsExpanding = false); + inline SDValue getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment = MaybeAlign(), + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, + bool IsExpanding = false) { + // Ensures that codegen never sees a None Alignment. + return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, + Mask, EVL, PtrInfo, MemVT, + Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, + AAInfo, Ranges, IsExpanding); + } + SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, + bool IsExpanding = false); + SDValue getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Stride, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + const MDNode *Ranges = nullptr, + bool IsExpanding = false); + SDValue getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, + SDValue Stride, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, bool IsExpanding = false); + SDValue + getExtStridedLoadVP(ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, + SDValue Chain, SDValue Ptr, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsExpanding = false); + SDValue getExtStridedLoadVP(ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, + SDValue Chain, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding = false); + SDValue getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); + SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, + SDValue Ptr, SDValue Offset, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + bool IsTruncating = false, + bool IsCompressing = false); + SDValue getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, + SDValue Ptr, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, + EVT SVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + bool IsCompressing = false); + SDValue getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, + SDValue Ptr, SDValue Stride, SDValue Mask, + SDValue EVL, EVT SVT, MachineMemOperand *MMO, + bool IsCompressing = false); + SDValue getIndexedStridedStoreVP(SDValue OrigStore, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); + SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -508,7 +508,7 @@ class LSBaseSDNodeBitfields { friend class LSBaseSDNode; - friend class VPLoadStoreSDNode; + friend class VPBaseLoadStoreSDNode; friend class MaskedLoadStoreSDNode; friend class MaskedGatherScatterSDNode; friend class VPGatherScatterSDNode; @@ -529,6 +529,7 @@ class LoadSDNodeBitfields { friend class LoadSDNode; friend class VPLoadSDNode; + friend class VPStridedLoadSDNode; friend class MaskedLoadSDNode; friend class MaskedGatherSDNode; friend class VPGatherSDNode; @@ -542,6 +543,7 @@ class StoreSDNodeBitfields { friend class StoreSDNode; friend class VPStoreSDNode; + friend class VPStridedStoreSDNode; friend class MaskedStoreSDNode; friend class MaskedScatterSDNode; friend class VPScatterSDNode; @@ -1365,6 +1367,7 @@ case ISD::VP_STORE: case ISD::MSTORE: case ISD::VP_SCATTER: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: return getOperand(2); case ISD::MGATHER: case ISD::MSCATTER: @@ -1408,6 +1411,8 @@ case ISD::VP_STORE: case ISD::VP_GATHER: case ISD::VP_SCATTER: + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: return true; default: return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); @@ -2354,34 +2359,64 @@ } }; -/// This base class is used to represent VP_LOAD and VP_STORE nodes -class VPLoadStoreSDNode : public MemSDNode { +/// This base class is used to represent VP_LOAD, VP_STORE, +/// EXPERIMENTAL_VP_STRIDED_LOAD and EXPERIMENTAL_VP_STRIDED_STORE nodes +class VPBaseLoadStoreSDNode : public MemSDNode { public: friend class SelectionDAG; - VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, - SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + VPBaseLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, + const DebugLoc &DL, SDVTList VTs, + ISD::MemIndexedMode AM, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, DL, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); } - // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL) - // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL) + // VPStridedStoreSDNode (Chain, Data, Ptr, Offset, Stride, Mask, EVL) + // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL) + // VPStridedLoadSDNode (Chain, Ptr, Offset, Stride, Mask, EVL) + // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL) // Mask is a vector of i1 elements; // the type of EVL is TLI.getVPExplicitVectorLengthTy(). const SDValue &getOffset() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3); + return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD || + getOpcode() == ISD::VP_LOAD) + ? 2 + : 3); } const SDValue &getBasePtr() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2); + return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD || + getOpcode() == ISD::VP_LOAD) + ? 1 + : 2); } const SDValue &getMask() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4); + switch (getOpcode()) { + default: + llvm_unreachable("Invalid opcode"); + case ISD::VP_LOAD: + return getOperand(3); + case ISD::VP_STORE: + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + return getOperand(4); + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + return getOperand(5); + } } const SDValue &getVectorLength() const { - return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5); + switch (getOpcode()) { + default: + llvm_unreachable("Invalid opcode"); + case ISD::VP_LOAD: + return getOperand(4); + case ISD::VP_STORE: + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + return getOperand(5); + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + return getOperand(6); + } } /// Return the addressing mode for this load or store: @@ -2397,19 +2432,21 @@ bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE; + return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD || + N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE || + N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE; } }; /// This class is used to represent a VP_LOAD node -class VPLoadSDNode : public VPLoadStoreSDNode { +class VPLoadSDNode : public VPBaseLoadStoreSDNode { public: friend class SelectionDAG; VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding, EVT MemVT, MachineMemOperand *MMO) - : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) { + : VPBaseLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) { LoadSDNodeBits.ExtTy = ETy; LoadSDNodeBits.IsExpanding = isExpanding; } @@ -2429,15 +2466,45 @@ bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } }; +/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node. +class VPStridedLoadSDNode : public VPBaseLoadStoreSDNode { +public: + friend class SelectionDAG; + + VPStridedLoadSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs, + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, + bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) + : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, Order, DL, VTs, + AM, MemVT, MMO) { + LoadSDNodeBits.ExtTy = ETy; + LoadSDNodeBits.IsExpanding = IsExpanding; + } + + ISD::LoadExtType getExtensionType() const { + return static_cast(LoadSDNodeBits.ExtTy); + } + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getOffset() const { return getOperand(2); } + const SDValue &getStride() const { return getOperand(3); } + const SDValue &getMask() const { return getOperand(4); } + const SDValue &getVectorLength() const { return getOperand(5); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD; + } + bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } +}; + /// This class is used to represent a VP_STORE node -class VPStoreSDNode : public VPLoadStoreSDNode { +class VPStoreSDNode : public VPBaseLoadStoreSDNode { public: friend class SelectionDAG; VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, EVT MemVT, MachineMemOperand *MMO) - : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) { + : VPBaseLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) { StoreSDNodeBits.IsTruncating = isTrunc; StoreSDNodeBits.IsCompressing = isCompressing; } @@ -2464,6 +2531,43 @@ } }; +/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node. +class VPStridedStoreSDNode : public VPBaseLoadStoreSDNode { +public: + friend class SelectionDAG; + + VPStridedStoreSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs, + ISD::MemIndexedMode AM, bool IsTrunc, bool IsCompressing, + EVT MemVT, MachineMemOperand *MMO) + : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_STORE, Order, DL, + VTs, AM, MemVT, MMO) { + StoreSDNodeBits.IsTruncating = IsTrunc; + StoreSDNodeBits.IsCompressing = IsCompressing; + } + + /// Return true if this is a truncating store. + /// For integers this is the same as doing a TRUNCATE and storing the result. + /// For floats, it is the same as doing an FP_ROUND and storing the result. + bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } + + /// Returns true if the op does a compression to the vector before storing. + /// The node contiguously stores the active elements (integers or floats) + /// in src (those with their respective bit set in writemask k) to unaligned + /// memory at base_addr. + bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } + + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + const SDValue &getOffset() const { return getOperand(3); } + const SDValue &getStride() const { return getOperand(4); } + const SDValue &getMask() const { return getOperand(5); } + const SDValue &getVectorLength() const { return getOperand(6); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE; + } +}; + /// This base class is used to represent MLOAD and MSTORE nodes class MaskedLoadStoreSDNode : public MemSDNode { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1409,6 +1409,22 @@ llvm_i32_ty], [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers +// Experimental strided memory accesses +def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[], + [ llvm_anyvector_ty, + LLVMPointerToElt<0>, + llvm_anyint_ty, // Stride in bytes + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>; + +def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [ LLVMPointerToElt<0>, + llvm_anyint_ty, // Stride in bytes + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; + // Speculatable Binary operators let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -262,6 +262,13 @@ VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_store, VP_STORE) +// llvm.experimental.vp.strided.store(val,ptr,stride,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_store, 3, 4) +// chain = EXPERIMENTAL_VP_STRIDED_STORE chain,val,base,offset,stride,mask,evl +BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_STORE, 0, experimental_vp_strided_store, 5, 6) +VP_PROPERTY_MEMOP(1, 0) +END_REGISTER_VP(experimental_vp_strided_store, EXPERIMENTAL_VP_STRIDED_STORE) + // llvm.vp.scatter(ptr,val,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3) // chain = VP_SCATTER chain,val,base,indices,scale,mask,evl @@ -279,6 +286,13 @@ VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_load, VP_LOAD) +// llvm.experimental.vp.strided.load(ptr,stride,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3) +// chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl +BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_LOAD, -1, experimental_vp_strided_load, 4, 5) +VP_PROPERTY_MEMOP(0, None) +END_REGISTER_VP(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD) + // llvm.vp.gather(ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2) // val,chain = VP_GATHER chain,base,indices,scale,mask,evl diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1174,6 +1174,11 @@ Node->getOpcode(), cast(Node)->getValue().getValueType()); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast(Node)->getValue().getValueType()); + break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -741,6 +741,20 @@ ID.AddInteger(EST->getMemOperand()->getFlags()); break; } + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: { + const VPStridedLoadSDNode *SLD = cast(N); + ID.AddInteger(SLD->getMemoryVT().getRawBits()); + ID.AddInteger(SLD->getRawSubclassData()); + ID.AddInteger(SLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: { + const VPStridedStoreSDNode *SST = cast(N); + ID.AddInteger(SST->getMemoryVT().getRawBits()); + ID.AddInteger(SST->getRawSubclassData()); + ID.AddInteger(SST->getPointerInfo().getAddrSpace()); + break; + } case ISD::VP_GATHER: { const VPGatherSDNode *EG = cast(N); ID.AddInteger(EG->getMemoryVT().getRawBits()); @@ -8085,6 +8099,259 @@ return V; } +SDValue SelectionDAG::getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + + uint64_t Size = MemoryLocation::UnknownSize; + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); + return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask, + EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + + SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL}; + SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = + newSDNode(DL.getIROrder(), DL.getDebugLoc(), VTs, AM, + ExtType, IsExpanding, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getStridedLoadVP( + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, + Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment, + MMOFlags, AAInfo, Ranges, IsExpanding); +} + +SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, + SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, + Undef, Stride, Mask, EVL, VT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getExtStridedLoadVP( + ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, + Stride, Mask, EVL, PtrInfo, MemVT, Alignment, + MMOFlags, AAInfo, nullptr, IsExpanding); +} + +SDValue SelectionDAG::getExtStridedLoadVP( + ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, + Stride, Mask, EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *SLD = cast(OrigLoad); + assert(SLD->getOffset().isUndef() && + "Strided load is already a indexed load!"); + // Don't propagate the invariant or dereferenceable flags. + auto MMOFlags = + SLD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + return getStridedLoadVP( + AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(), + Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(), + SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags, + SLD->getAAInfo(), nullptr, SLD->isExpandingLoad()); +} + +SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, + SDValue Val, SDValue Ptr, + SDValue Offset, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, + bool IsTruncating, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(DL.getIROrder(), DL.getDebugLoc(), + VTs, AM, IsTruncating, + IsCompressing, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getTruncStridedStoreVP( + SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo); + return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT, + MMO, IsCompressing); +} + +SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, + SDValue Val, SDValue Ptr, + SDValue Stride, SDValue Mask, + SDValue EVL, EVT SVT, + MachineMemOperand *MMO, + bool IsCompressing) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + if (VT == SVT) + return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()), + Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED, + /*IsTruncating*/ false, IsCompressing); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == SVT.getVectorElementCount()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(DL.getIROrder(), DL.getDebugLoc(), + VTs, ISD::UNINDEXED, true, + IsCompressing, SVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore, + const SDLoc &DL, SDValue Base, + SDValue Offset, + ISD::MemIndexedMode AM) { + auto *SST = cast(OrigStore); + assert(SST->getOffset().isUndef() && + "Strided store is already an indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { + SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(), + SST->getMask(), SST->getVectorLength()}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(SST->getMemoryVT().getRawBits()); + ID.AddInteger(SST->getRawSubclassData()); + ID.AddInteger(SST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode( + DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(), + SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand()); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -570,6 +570,10 @@ SmallVector &OpValues, bool IsGather); void visitVPStoreScatter(const VPIntrinsic &VPIntrin, SmallVector &OpValues, bool IsScatter); + void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVectorImpl &OpValues); + void visitVPStridedStore(const VPIntrinsic &VPIntrin, + SmallVectorImpl &OpValues); void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7453,6 +7453,54 @@ setValue(&VPIntrin, ST); } +void SelectionDAGBuilder::visitVPStridedLoad( + const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl &OpValues) { + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + + SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], + OpValues[2], OpValues[3], MMO, + false /*IsExpanding*/); + + if (AddToChain) + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStridedStore( + const VPIntrinsic &VPIntrin, SmallVectorImpl &OpValues) { + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + + SDValue ST = DAG.getStridedStoreVP( + getMemoryRoot(), DL, OpValues[0], OpValues[1], + DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3], + OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false, + /*IsCompressing*/ false); + + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { SDLoc DL = getCurSDLoc(); @@ -7490,10 +7538,16 @@ visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, Opcode == ISD::VP_GATHER); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues); + break; case ISD::VP_STORE: case ISD::VP_SCATTER: visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + visitVPStridedStore(VPIntrin, OpValues); + break; } } diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -505,6 +505,10 @@ VPFunc = Intrinsic::getDeclaration( M, VPID, {ReturnType, Params[0]->getType()}); break; + case Intrinsic::experimental_vp_strided_load: + VPFunc = + Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[1]->getType()}); + break; case Intrinsic::vp_gather: VPFunc = Intrinsic::getDeclaration( M, VPID, {ReturnType, Params[0]->getType()}); @@ -513,6 +517,10 @@ VPFunc = Intrinsic::getDeclaration( M, VPID, {Params[0]->getType(), Params[1]->getType()}); break; + case Intrinsic::experimental_vp_strided_store: + VPFunc = Intrinsic::getDeclaration( + M, VPID, {Params[0]->getType(), Params[2]->getType()}); + break; case Intrinsic::vp_scatter: VPFunc = Intrinsic::getDeclaration( M, VPID, {Params[0]->getType(), Params[1]->getType()}); diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -58,10 +58,16 @@ Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, " "<8 x i1>, i32) "; + Str << "declare void " + "@llvm.experimental.vp.strided.store.v8i32.i32(<8 x i32>, " + "i32*, i32, <8 x i1>, i32) "; Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x " "i32*>, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x " "i1>, i32) "; + Str << "declare <8 x i32> " + "@llvm.experimental.vp.strided.load.v8i32.i32(i32*, i32, <8 " + "x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x " "i1>, i32) ";