diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17698,6 +17698,98 @@ ``TargetTransformInfo::hasActiveVectorLength()`` returns true when the target has native support for %evl. +.. _int_vp_select: + +'``llvm.vp.select.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.select.v16i32 (<16 x i1> , <16 x i32> , <16 x i32> , i32 , i1 immarg ) + declare @llvm.vp.select.nxv4i64 ( , , , i32 , i1 immarg ) + +Overview: +""""""""" + +The '``llvm.vp.select``' intrinsic is used to choose one value based on a +condition, without IR-level branching. + +Arguments: +"""""""""" + +The first operand is a vector of ``i1`` and indicates the mask. The second +operand is the value that is selected when the condition is true. The third +operand is the value that is selected when the condition is false. +The vectors must be of the same size. +The fourth operand is the explicit vector length. +The fifth operand is an ``i1`` immediate. The immediate controls whether lanes +of the third operand are passed through at positions greater or equal than the +explicit vector length. + +#. The optional ``fast-math flags`` marker indicates that the select has one or more + :ref:`fast-math flags `. These are optimization hints to enable + otherwise unsafe floating-point optimizations. Fast-math flags are only valid + for selects that return a floating-point scalar or vector type, or an array + (nested to any depth) of floating-point scalar or vector types. + +Semantics: +"""""""""" + +The intrinsic selects lanes from the second and third operand depending on a +condition. + +There are two selection behaviors and the ``tail_passthru`` immediate identifies which behavior applies. + +If ``tail_passthru == 1`` the intrinsic behaves as follows. +The pivot creates a mask, %pivot, with all elements ``0 <= i < +%evl`` set to ``1`` and all others set to ``0``. + +:: + + M = %mask AND %pivotMask + +If the condition is an i1 and it evaluates to 1, the instruction returns +the first value argument; otherwise, it returns the second value +argument. + + +If ``tail_passthru == 0`` the intrinsic behaves as follows. +For all lanes at positions below ``%evl``, the mask parameter is the condition. +If the condition is an i1 and it evaluates to 1, the instruction returns +the first value argument; otherwise, it returns the second value +argument. +All result lanes at positions greater or equal than ``%evl`` are undefined. + +Example: +"""""""" + +.. code-block:: llvm + + ;;;; vp.select with tail passthru. + + %r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %m, <4 x i32> %on_true, <4 x i32> %on_false, i32 %evl, i1 immarg 1) + + ;;; Expansion. + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + ;; Lanes above and including %evl are passed through. + %evlMask = icmp lgt i32 <0, 1, 2, 3> %lane_idx + %M = and <4 x i1> %mask, %evlMask + %also.r = select <4 x i1> %m, <4 x i32> %on_true, <4 x i32> %on_false + + + ;;;; vp.select without tail passthru. + + %r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %m, <4 x i32> %on_true, <4 x i32> %on_false, i32 %evl, i1 immarg 0) + + ;;; Expansion. + ;; Any result is legal on lanes at and above %evl. + %also.r = select <4 x i1> %m, <4 x i32> %on_true, <4 x i32> %on_false + + .. _int_vp_add: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1497,6 +1497,14 @@ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; } +// Shuffles. +def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + llvm_i1_ty], + [ImmArg>]>; def int_get_active_lane_mask: DefaultAttrsIntrinsic<[llvm_anyvector_ty], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -231,6 +231,15 @@ ///// } Memory Operations +///// Shuffles { + +// llvm.vp.select(mask,on_true,on_false,pivot) +BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3) +// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4) +// END_REGISTER_CASES(vp_select, VP_SELECT) +END_REGISTER_VP_INTRINSIC(vp_select) + +///// } Shuffles #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -476,6 +476,11 @@ default: VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType()); break; + case Intrinsic::vp_select: + VPFunc = Intrinsic::getDeclaration( + M, VPID, + {Params[1]->getType()}); + break; case Intrinsic::vp_load: VPFunc = Intrinsic::getDeclaration( M, VPID, diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -54,7 +54,8 @@ "i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x " "i1>, i32) "; - + Str << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x " + "i32>, i32)"; return parseAssemblyString(Str.str(), Err, C); } };