diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17634,6 +17634,76 @@ ``TargetTransformInfo::hasActiveVectorLength()`` returns true when the target has native support for %evl. +.. _int_vp_select: + +'``llvm.vp.select.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.select.v16i32 (<16 x i1> , <16 x i32> , <16 x i32> , i32 ) + declare @llvm.vp.select.nxv4i64 ( , , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.select``' intrinsic is used to choose one value based on a +condition, without IR-level branching. + +Arguments: +"""""""""" + +The first operand is a vector of `i1` and indicates the mask. The second +operand is the value that is selected when the condition is true. The third +operand is the value that is selected when the condition is false. The fourth +operand is the pivot. + +#. The optional ``fast-math flags`` marker indicates that the select has one or more + :ref:`fast-math flags `. These are optimization hints to enable + otherwise unsafe floating-point optimizations. Fast-math flags are only valid + for selects that return a floating-point scalar or vector type, or an array + (nested to any depth) of floating-point scalar or vector types. + +Semantics: +"""""""""" + +The intrinsic selects lanes from the second and third operand depending on a +condition. The pivot creates a mask, %pivot, with all elements ``0 <= i < +%pivotMask`` set to ``1`` and all others set to ``0``. + +:: + + M = %mask AND %pivotMask + +If the condition is an i1 and it evaluates to 1, the instruction returns +the first value argument; otherwise, it returns the second value +argument. + +If the condition is a vector of i1, then the value arguments must be +vectors of the same size, and the selection is done element by element. + +If the condition is an i1 and the value arguments are vectors of the +same size, then an entire vector is selected. + +Note that the intrinsic does not have an ``%evl`` parameter. + +Example: +"""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %m, <4 x i32> %a, <4 x i32> %b, i32 %pivot) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %pivotMask = icmp lgt i32 <0, 1, 2, 3> %lane_idx + %M = and <4 x i1> %mask, %pivotMask + %also.r = select <4 x i1> %M, <4 x i32> %on_true, <4 x i32> %on_false + + .. _int_vp_add: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1497,6 +1497,12 @@ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; } +// Shuffles. +def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; def int_get_active_lane_mask: DefaultAttrsIntrinsic<[llvm_anyvector_ty], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -231,6 +231,15 @@ ///// } Memory Operations +///// Shuffles { + +// llvm.vp.select(mask,on_true,on_false,pivot) +BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, None) +// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4) +// END_REGISTER_CASES(vp_select, VP_SELECT) +END_REGISTER_VP_INTRINSIC(vp_select) + +///// } Shuffles #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -476,6 +476,11 @@ default: VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType()); break; + case Intrinsic::vp_select: + VPFunc = Intrinsic::getDeclaration( + M, VPID, + {Params[1]->getType()}); + break; case Intrinsic::vp_load: VPFunc = Intrinsic::getDeclaration( M, VPID, diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -46,10 +46,11 @@ Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode << ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) "; - Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) "; - Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, <8 x i1>, i32) "; - Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) "; - Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) "; + Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) " + << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, <8 x i1>, i32) " + << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) " + << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) " + << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)"; return parseAssemblyString(Str.str(), Err, C); }