diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -19604,6 +19604,115 @@ llvm.experimental.vp.splice(, , -2, 3, 2) ==> ; trailing elements +.. _int_vp_load: + +'``llvm.vp.load``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x float> @llvm.vp.load.v4f32.p0v4f32(<4 x float>* %ptr, <4 x i1> %mask, i32 %evl) + declare @llvm.vp.load.nxv2i16.p0nxv2i16(* %ptr, %mask, i32 %evl) + declare <8 x float> @llvm.vp.load.v8f32.p1v8f32(<8 x float> addrspace(1)* %ptr, <8 x i1> %mask, i32 %evl) + declare @llvm.vp.load.nxv1i64.p6nxv1i64( addrspace(6)* %ptr, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.load.*``' intrinsic is the vector length predicated version of +the :ref:`llvm.masked.load ` intrinsic. + +Arguments: +"""""""""" + +The first operand is the base pointer for the load. The second operand is a +vector of boolean values with the same number of elements as the return type. +The third is the explicit vector length of the operation. The return type and +underlying type of the base pointer are the same vector types. + +Semantics: +"""""""""" + +The '``llvm.vp.load``' intrinsic reads a vector from memory in the same way as +the '``llvm.masked.load``' intrinsic, where the mask is taken from the +combination of the '``mask``' and '``evl``' operands in the usual VP way. Of +the '``llvm.masked.load``' operands not set by '``llvm.vp.load``': the +'``passthru``' operand is implicitly ``undef``; the '``alignment``' operand is +taken as the ABI alignment of the return type as specified by the +:ref:`datalayout string`. + +Examples: +""""""""" + +.. code-block:: text + + %r = call <8 x i8> @llvm.vp.load.v8i8.p0v8i8(<8 x i8>* %ptr, <8 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + ;; Note that since the alignment is ultimately up to the data layout + ;; string, 8 (the default) is used as an example. + + %also.r = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %ptr, i32 8, <8 x i1> %mask, <8 x i8> undef) + + +.. _int_vp_store: + +'``llvm.vp.store``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare void @llvm.vp.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %mask, i32 %evl) + declare void @llvm.vp.store.nxv2i16.p0nxv2i16( %val, * %ptr, %mask, i32 %evl) + declare void @llvm.vp.store.v8f32.p1v8f32(<8 x float> %val, <8 x float> addrspace(1)* %ptr, <8 x i1> %mask, i32 %evl) + declare void @llvm.vp.store.nxv1i64.p6nxv1i64( %val, addrspace(6)* %ptr, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.store.*``' intrinsic is the vector length predicated version of +the :ref:`llvm.masked.store ` intrinsic. + +Arguments: +"""""""""" + +The first operand is the vector value to be written to memory. The second +operand is the base pointer for the store. It has the same underlying type as +the value operand. The third operand is a vector of boolean values with the +same number of elements as the return type. The fourth is the explicit vector +length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.store``' intrinsic reads a vector from memory in the same way as +the '``llvm.masked.store``' intrinsic, where the mask is taken from the +combination of the '``mask``' and '``evl``' operands in the usual VP way. The +'``alignment``' operand of the '``llvm.masked.store``' intrinsic is not set by +'``llvm.vp.store``': it is taken as the ABI alignment of the type of the +'``value``' operand as specified by the :ref:`datalayout +string`. + +Examples: +""""""""" + +.. code-block:: text + + call void @llvm.vp.store.v8i8.p0v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, the call above is lane-wise equivalent to the call below. + ;; Note that since the alignment is ultimately up to the data layout + ;; string, 8 (the default) is used as an example. + + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %val, <8 x i8>* %ptr, i32 8, <8 x i1> %mask) + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics