diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20052,6 +20052,61 @@ call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %mask) +.. _int_vp_fptosi: + +'``llvm.vp.fptosi.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x float> @llvm.vp.fptosi.v16f32 (<16 x float> , <16 x i1> , i32 ) + declare @llvm.vp.fptosi.nxv4f32 ( , , i32 ) + declare <256 x double> @llvm.vp.fptosi.v256f64 (<256 x double> , <256 x i1> , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.fptosi``' intrinsic converts the :ref:`floating-point +` operand to the signed integer return type. +The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.fptosi``' intrinsic takes a value to cast as its first operand. +The value to cast must be a vector of :ref:`floating-point ` type. +The return type is the type to cast the value to. The return type must be +vector of :ref:`integer ` type. The second operand is the vector +mask. The return type, the value to cast, and the vector mask have the same +number of elements. The third operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.fptosi``' intrinsic converts its :ref:`floating-point +` operand into the nearest (rounding towards zero) signed integer +value where the lane position is below the explicit vector length and the +vector mask is true. Masked-off lanes are undefined. On enabled lanes where +conversion takes place and the value cannot fit in the return type, the result +on that lane is a :ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = fptosi <4 x float> %a to <4 x i32> + %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef + .. _int_mload_mstore: diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -472,6 +472,21 @@ /// @} }; +class VPCastIntrinsic : public VPIntrinsic { +public: + static bool isVPCast(Intrinsic::ID ID); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// @{ + static bool classof(const IntrinsicInst *I) { + return VPCastIntrinsic::isVPCast(I->getIntrinsicID()); + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1523,6 +1523,13 @@ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; } + +// Casts. +def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + // Shuffles. def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -121,6 +121,11 @@ #define VP_PROPERTY_BINARYOP #endif +// A property to infer VP type casts automatically. +#ifndef VP_PROPERTY_CASTOP +#define VP_PROPERTY_CASTOP +#endif + /// } Property Macros ///// Integer Arithmetic { @@ -223,6 +228,27 @@ ///// } Floating-Point Arithmetic +///// Type Casts { +// Specialized helper macro for type conversions. +// (%x, %mask, %evl). +#ifdef HELPER_REGISTER_CAST_VP +#error \ + "The internal helper macro HELPER_REGISTER_CAST_VP is already defined!" +#endif +#define HELPER_REGISTER_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \ + BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \ + VP_PROPERTY_CASTOP \ + END_REGISTER_VP(vp_##OPSUFFIX, VPSD) + +// llvm.vp.fptosi(x,mask,vlen) +HELPER_REGISTER_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0) + +#undef HELPER_REGISTER_CAST_VP + +///// } Type Casts + ///// Memory Operations { // llvm.vp.store(val,ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3) @@ -375,6 +401,7 @@ #undef END_REGISTER_VP_INTRINSIC #undef END_REGISTER_VP_SDNODE #undef VP_PROPERTY_BINARYOP +#undef VP_PROPERTY_CASTOP #undef VP_PROPERTY_CONSTRAINEDFP #undef VP_PROPERTY_FUNCTIONAL_INTRINSIC #undef VP_PROPERTY_FUNCTIONAL_OPC diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -492,6 +492,10 @@ VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy); break; } + case Intrinsic::vp_fptosi: + VPFunc = + Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()}); + break; case Intrinsic::vp_merge: case Intrinsic::vp_select: VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()}); @@ -529,6 +533,18 @@ return false; } +bool VPCastIntrinsic::isVPCast(Intrinsic::ID ID) { + switch (ID) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_CASTOP return true; +#define END_REGISTER_VP_INTRINSIC(VPID) break; +#include "llvm/IR/VPIntrinsics.def" + } + return false; +} + unsigned VPReductionIntrinsic::getVectorParamPos() const { return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID()); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -521,6 +521,7 @@ void visitUserOp2(Instruction &I) { visitUserOp1(I); } void visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call); void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI); + void visitVPIntrinsic(VPIntrinsic &VPI); void visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII); void visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI); void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI); @@ -4808,6 +4809,10 @@ "unsupported rounding mode argument", Call); break; } +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#include "llvm/IR/VPIntrinsics.def" + visitVPIntrinsic(cast(Call)); + break; #define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -5513,6 +5518,17 @@ return nullptr; } +void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) { + if (auto *VPCast = dyn_cast(&VPI)) { + auto *RetTy = cast(VPCast->getType()); + auto *ValTy = cast(VPCast->getOperand(0)->getType()); + Assert(RetTy->getElementCount() == ValTy->getElementCount(), + "VP cast intrinsic first argument and result vector lengths must be " + "equal", + *VPCast); + } +} + void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { unsigned NumOperands; bool HasRoundingMD; diff --git a/llvm/test/Verifier/invalid-vp-intrinsics.ll b/llvm/test/Verifier/invalid-vp-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/invalid-vp-intrinsics.ll @@ -0,0 +1,11 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32); + +; CHECK: VP cast intrinsic first argument and result vector lengths must be equal +; CHECK-NEXT: %r0 = call <4 x i32> + +define void @test_vp_fptosi(<8 x float> %src, <4 x i1> %m, i32 %n) { + %r0 = call <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float> %src, <4 x i1> %m, i32 %n) + ret void +} diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -80,6 +80,9 @@ Str << " declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x " "i32>, <8 x i32>, i32, <8 x i1>, i32, i32) "; + Str << " declare <8 x i32> @llvm.vp.fptosi.v8i32" + << ".v8f32(<8 x float>, <8 x i1>, i32) "; + return parseAssemblyString(Str.str(), Err, C); } };