Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -18576,6 +18576,64 @@ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) +.. _int_experimental_vp_splice: + +'``llvm.experimental.vp.splice``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %vec1, <2 x double> %vec2, i32 %imm, <2 x i1> %mask, i32 %evl1, i32 %evl2) + declare @llvm.experimental.vp.splice.nxv4i32( %vec1, %vec2, i32 %imm, <2 x i1> %mask i32 %evl1, i32 %evl2) + +Overview: +""""""""" + +The '``llvm.experimental.vp.splice.*``' intrinsic is the vector length +predicated version of the '``llvm.experimental.vector.splice.*``' intrinsic. + +Arguments: +"""""""""" + +The result and the first two arguments ``vec1`` and ``vec2`` are vectors with +the same type. The third argument ``imm`` is a signed integer that indicates +the offset index. The fourth argument ``mask`` is a vector mask and has the +same number of elements as the result. The last two arguments ``evl1`` and +``evl2`` are unsigned integers indicating the explicit vector lengths of +``vec1`` and ``vec2`` respectively. ``imm``, ``evl1`` and ``evl`` should +respect the following constraints: ``-evl1 <= imm < evl1``, ``0 <= evl1 <= VL`` +and ``0 <= evl2 <= VL``, where ``VL`` is the runtime vector factor. If these +constraints are not satisfied the intrinsic has undefined behaviour. + +Semantics: +"""""""""" + +Effectively, this intrinsic concatenates ``vec1[0..evl1-1]`` and +``vec2[0..evl2-1]`` and creates the result vector by selectng the elements in a +window of size ``evl2``, starting at index ``imm`` (for a positive immediate) of +the concatenated vector. Elements in the result vector beyond ``evl2`` are +undef. If ``imm`` is negative the starting index is ``evl1 + imm``. The result +vector of active vector length ``evl2`` contains ``evl1 - imm`` (``-imm`` for +negative ``imm``) elements from indices ``[imm..evl1 - 1]`` +(``[evl1 + imm..evl1 -1]`` for negative ``imm``) of ``vec1`` followed by the +first ``evl2 - (evl1 - imm)`` (``evl2 + imm`` for negative ``imm``) elements of +``vec2``. If ``evl1 - imm`` (``-imm``) >= ``evl2``, only the first ``evl2`` +elements are considered and the remaining are undef. The lanes in the result +vector disabled by ``mask`` are undef. + +Examples: +""""""""" + +.. code-block:: text + + llvm.experimental.vp.splice(, , 1, 2, 3) ==> ; index + llvm.experimental.vp.splice(, , -2, 3, 2) ==> ; trailing elements + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1503,6 +1503,15 @@ [llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem, IntrNoSync, IntrWillReturn]>; +def int_experimental_vp_splice: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_load: Index: llvm/include/llvm/IR/VPIntrinsics.def =================================================================== --- llvm/include/llvm/IR/VPIntrinsics.def +++ llvm/include/llvm/IR/VPIntrinsics.def @@ -231,6 +231,10 @@ ///// } Memory Operations +///// Shuffles +BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, + EXPERIMENTAL_VP_SPLICE, -1) +END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_VECTOR_SPLICE) #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC Index: llvm/test/Verifier/vp-intrinsics.ll =================================================================== --- llvm/test/Verifier/vp-intrinsics.ll +++ llvm/test/Verifier/vp-intrinsics.ll @@ -29,6 +29,16 @@ ; TODO: test_vp_constrained_fp +define void @test_vp_splice0(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %l0, i32 %l1) { + %r0 = call <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32> %i0, <8 x i32> %i1, i32 2, <8 x i1> %m, i32 %l0, i32 %l1) + ret void +} + +define void @test_vp_splice1( %i0, %i1, %m, i32 %l0, i32 %l1) { + %r0 = call @llvm.experimental.vp.splice.nxv8i32( %i0, %i1, i32 -1, %m, i32 %l0, i32 %l1) + ret void +} + ; integer arith declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) @@ -50,3 +60,6 @@ declare <8 x double> @llvm.vp.fmul.v8f64(<8 x double>, <8 x double>, <8 x i1>, i32) declare <8 x double> @llvm.vp.fdiv.v8f64(<8 x double>, <8 x double>, <8 x i1>, i32) declare <8 x double> @llvm.vp.frem.v8f64(<8 x double>, <8 x double>, <8 x i1>, i32) +; shuffles +declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32>, <8 x i32>, i32, <8 x i1>, i32, i32) +declare @llvm.experimental.vp.splice.nxv8i32(, , i32, , i32, i32) Index: llvm/unittests/IR/VPIntrinsicTest.cpp =================================================================== --- llvm/unittests/IR/VPIntrinsicTest.cpp +++ llvm/unittests/IR/VPIntrinsicTest.cpp @@ -46,10 +46,17 @@ Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode << ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) "; - Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) "; - Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, <8 x i1>, i32) "; - Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) "; - Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) "; + Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, " + "<8 x i1>, i32) "; + Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x " + "i32*>, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x " + "i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x " + "i1>, i32) "; + + Str << " declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x " + "i32>, <8 x i32>, i32, <8 x i1>, i32, i32) "; return parseAssemblyString(Str.str(), Err, C); }