diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -19541,6 +19541,64 @@ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) +.. _int_experimental_vp_splice: + +'``llvm.experimental.vp.splice``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %vec1, <2 x double> %vec2, i32 %imm, <2 x i1> %mask, i32 %evl1, i32 %evl2) + declare @llvm.experimental.vp.splice.nxv4i32( %vec1, %vec2, i32 %imm, <2 x i1> %mask i32 %evl1, i32 %evl2) + +Overview: +""""""""" + +The '``llvm.experimental.vp.splice.*``' intrinsic is the vector length +predicated version of the '``llvm.experimental.vector.splice.*``' intrinsic. + +Arguments: +"""""""""" + +The result and the first two arguments ``vec1`` and ``vec2`` are vectors with +the same type. The third argument ``imm`` is an immediate signed integer that +indicates the offset index. The fourth argument ``mask`` is a vector mask and +has the same number of elements as the result. The last two arguments ``evl1`` +and ``evl2`` are unsigned integers indicating the explicit vector lengths of +``vec1`` and ``vec2`` respectively. ``imm``, ``evl1`` and ``evl2`` should +respect the following constraints: ``-evl1 <= imm < evl1``, ``0 <= evl1 <= VL`` +and ``0 <= evl2 <= VL``, where ``VL`` is the runtime vector factor. If these +constraints are not satisfied the intrinsic has undefined behaviour. + +Semantics: +"""""""""" + +Effectively, this intrinsic concatenates ``vec1[0..evl1-1]`` and +``vec2[0..evl2-1]`` and creates the result vector by selecting the elements in a +window of size ``evl2``, starting at index ``imm`` (for a positive immediate) of +the concatenated vector. Elements in the result vector beyond ``evl2`` are +``undef``. If ``imm`` is negative the starting index is ``evl1 + imm``. The result +vector of active vector length ``evl2`` contains ``evl1 - imm`` (``-imm`` for +negative ``imm``) elements from indices ``[imm..evl1 - 1]`` +(``[evl1 + imm..evl1 -1]`` for negative ``imm``) of ``vec1`` followed by the +first ``evl2 - (evl1 - imm)`` (``evl2 + imm`` for negative ``imm``) elements of +``vec2``. If ``evl1 - imm`` (``-imm``) >= ``evl2``, only the first ``evl2`` +elements are considered and the remaining are ``undef``. The lanes in the result +vector disabled by ``mask`` are ``undef``. + +Examples: +""""""""" + +.. code-block:: text + + llvm.experimental.vp.splice(, , 1, 2, 3) ==> ; index + llvm.experimental.vp.splice(, , -2, 3, 2) ==> ; trailing elements + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1580,6 +1580,15 @@ [llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem, IntrNoSync, IntrWillReturn]>; +def int_experimental_vp_splice: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_load: diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -356,6 +356,10 @@ // END_REGISTER_CASES(vp_select, VP_SELECT) END_REGISTER_VP_INTRINSIC(vp_select) +BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, + EXPERIMENTAL_VP_SPLICE, -1) +END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) + ///// } Shuffles #undef BEGIN_REGISTER_VP diff --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll --- a/llvm/test/Verifier/vp-intrinsics.ll +++ b/llvm/test/Verifier/vp-intrinsics.ll @@ -47,6 +47,16 @@ ret void } +define void @test_vp_splice0(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %l0, i32 %l1) { + %r0 = call <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32> %i0, <8 x i32> %i1, i32 2, <8 x i1> %m, i32 %l0, i32 %l1) + ret void +} + +define void @test_vp_splice1( %i0, %i1, %m, i32 %l0, i32 %l1) { + %r0 = call @llvm.experimental.vp.splice.nxv8i32( %i0, %i1, i32 -1, %m, i32 %l0, i32 %l1) + ret void +} + ; integer arith declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) @@ -82,3 +92,6 @@ declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) +; shuffles +declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32>, <8 x i32>, i32, <8 x i1>, i32, i32) +declare @llvm.experimental.vp.splice.nxv8i32(, , i32, , i32, i32) diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -70,6 +70,9 @@ Str << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x " "i32>, i32)"; + Str << " declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x " + "i32>, <8 x i32>, i32, <8 x i1>, i32, i32) "; + return parseAssemblyString(Str.str(), Err, C); } };