diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16366,6 +16366,81 @@ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef +.. _int_get_active_lane_mask: + +'``llvm.get.active.lane.mask.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %base, i32 %n) + declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %base, i64 %n) + declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %n) + declare @llvm.get.active.lane.mask.nxv16i1.i64(i64 %base, i64 %n) + + +Overview: +""""""""" + +Create a mask representing active and inactive vector lanes. + + +Arguments: +"""""""""" + +Both operands have the same scalar integer type. The result is a vector with +the i1 element type. + +Semantics: +"""""""""" + +The '``llvm.get.active.lane.mask.*``' intrinsics are semantically equivalent +to: + +:: + + %m[i] = icmp ule (%base + i), %n + +where ``%m`` is a vector (mask) of active/inactive lanes with its elements +indexed by ``i``, and ``%base``, ``%n`` are the two arguments to +``llvm.get.active.lane.mask.*``, ``%imcp`` is an integer compare and ``ule`` +the unsigned less-than-equal comparison operator. Overflow cannot occur in +``(%base + i)`` and its comparison against ``%n`` as it is performed in integer +numbers and not in machine numbers. The above is equivalent to: + +:: + + %m = @llvm.get.active.lane.mask(%base, %n) + +This can, for example, be emitted by the loop vectorizer. Then, ``%base`` is +the first element of the vector induction variable (VIV), and ``%n`` is the +Back-edge Taken Count (BTC). Thus, these intrinsics perform an element-wise +less than or equal comparison of VIV with BTC, producing a mask of true/false +values representing active/inactive vector lanes, except if the VIV overflows +in which case they return false in the lanes where the VIV overflows. The +arguments are scalar types to accomodate scalable vector types, for which it is +unknown what the type of the step vector needs to be that enumerate its +lanes without overflow. + +This mask ``%m`` can e.g. be used in masked load/store instructions. These +intrinsics provide a hint to the backend. I.e., for a vector loop, the +back-edge taken count of the original scalar loop is explicit as the second +argument. + + +Examples: +""""""""" + +.. code-block:: llvm + + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %elem0, i64 429) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1294,6 +1294,10 @@ } +def int_get_active_lane_mask: + Intrinsic<[llvm_anyvector_ty], + [llvm_anyint_ty, LLVMMatchType<1>], + [IntrNoMem, IntrNoSync, IntrWillReturn]>; //===-------------------------- Masked Intrinsics -------------------------===// // diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4812,6 +4812,14 @@ "eh.exceptionpointer argument must be a catchpad", Call); break; } + case Intrinsic::get_active_lane_mask: { + Assert(Call.getType()->isVectorTy(), "get_active_lane_mask: must return a " + "vector", Call); + auto *ElemTy = Call.getType()->getScalarType(); + Assert(ElemTy->isIntegerTy(1), "get_active_lane_mask: element type is not " + "i1", Call); + break; + } case Intrinsic::masked_load: { Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector", Call); diff --git a/llvm/test/Verifier/get-active-lane-mask.ll b/llvm/test/Verifier/get-active-lane-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/get-active-lane-mask.ll @@ -0,0 +1,21 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32, i32) + +define <4 x i32> @t1(i32 %IV, i32 %BTC) { +; CHECK: get_active_lane_mask: element type is not i1 +; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC) + + %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC) + ret <4 x i32> %res +} + +declare i32 @llvm.get.active.lane.mask.i32.i32(i32, i32) + +define i32 @t2(i32 %IV, i32 %BTC) { +; CHECK: Intrinsic has incorrect return type! +; CHECK-NEXT: i32 (i32, i32)* @llvm.get.active.lane.mask.i32.i32 + + %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %BTC) + ret i32 %res +}