Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -16158,6 +16158,68 @@ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef +.. _int_get_active_lane_mask: + +'``llvm.get.active.lane.mask.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %VIV[0], i32 %BTC) + declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %VIV[0], i64 %BTC) + declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64.v16i64(i64 %VIV[0], i64 %BTC) + + +Overview: +""""""""" + +Create a mask representing active and inactive vector lanes. + + +Arguments: +"""""""""" + +Both operands have the same scalar integer type. The first operand is the first +element of the Vector Induction Variable (VIV), denoted by ``%VIV[0].`` The second +operand is the scalar loop Back-edge Taken Count (BTC). The result is a vector +with the same number of elements as the VIV, but with the i1 element value +type. + +The arguments are scalar types to accomodate scalable vector types, for which +it is unknown what the the type of the step vector needs to be that enumerate +its lanes without overflow. + + +Semantics: +"""""""""" + +The '``llvm.get.active.lane.mask.*``' intrinsics are semantically equivalent to +`icmp ule %VIV, (splat %BTC)`, where VIV is the Vector Induction Variable, and +BTC the back-edge taken count splat into a vector, except when the vector +induction variable overflows, in which case they return false in the lanes +where the VIV overflows. Thus, these intrinsics perform an element-wise less +than or equal comparison of VIV with BTC, producing a mask of true/false values +representing active/inactive vector lanes. This mask can e.g. be used in the +masked load/store instructions. These intrinsics provides a hint to the +backend. I.e., for a vector loop, the back-edge taken count of the original +scalar loop is explicit as the second argument. + + +Examples: +""""""""" + +.. code-block:: llvm + + %induction = add <4 x i64> %broadcast.splat, + %elem0 = extractelement <4 x i32> %induction, i32 0 + %get.active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %elem0, i64 429) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %get.active.lane.mask, <4 x i32> undef) + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1235,6 +1235,10 @@ } +def int_get_active_lane_mask: + Intrinsic<[llvm_anyvector_ty], + [llvm_anyint_ty, LLVMMatchType<1>], + [IntrNoMem, IntrNoSync, IntrWillReturn]>; //===-------------------------- Masked Intrinsics -------------------------===// //