diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20176,6 +20176,332 @@ call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %mask) + +.. _int_vp_trunc: + +'``llvm.vp.trunc.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i16> @llvm.vp.trunc.v16i16.v16i32 (<16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.trunc.nxv4i16.nxv4i32 ( , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.trunc``' intrinsic truncates its first operand to the return +type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.trunc``' intrinsic takes a value to cast as its first operand. +The return type is the type to cast the value to. Both types must be vector of +:ref:`integer ` type. The bit size of the value must be larger than +the bit size of the return type. The second operand is the vector mask. The +return type, the value to cast, and the vector mask have the same number of +elements. The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.trunc``' intrinsic truncates the high order bits in value and +converts the remaining bits to return type. Since the source size must be larger +than the destination size, '``llvm.vp.trunc``' cannot be a *no-op cast*. It will +always truncate bits. The conversion is performed on lane positions below the +explicit vector length and where the vector mask is true. Masked-off lanes are +undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i16> @llvm.vp.trunc.v4i16.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = trunc <4 x i32> %a to <4 x i16> + %also.r = select <4 x i1> %mask, <4 x i16> %t, <4 x i16> undef + + +.. _int_vp_zext: + +'``llvm.vp.zext.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.zext.v16i32.v16i16 (<16 x i16> , <16 x i1> , i32 ) + declare @llvm.vp.zext.nxv4i32.nxv4i16 ( , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.zext``' intrinsic zero extends its first operand to the return +type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.zext``' intrinsic takes a value to cast as its first operand. +The return type is the type to cast the value to. Both types must be vectors of +:ref:`integer ` type. The bit size of the value must be smaller than +the bit size of the return type. The second operand is the vector mask. The +return type, the value to cast, and the vector mask have the same number of +elements. The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.zext``' intrinsic fill the high order bits of the value with zero +bits until it reaches the size of the return type. When zero extending from i1, +the result will always be either 0 or 1. The conversion is performed on lane +positions below the explicit vector length and where the vector mask is true. +Masked-off lanes are undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.zext.v4i32.v4i16(<4 x i16> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = zext <4 x i16> %a to <4 x i32> + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef + + +.. _int_vp_sext: + +'``llvm.vp.sext.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.sext.v16i32.v16i16 (<16 x i16> , <16 x i1> , i32 ) + declare @llvm.vp.sext.nxv4i32.nxv4i16 ( , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.sext``' intrinsic sign extends its first operand to the return +type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.sext``' intrinsic takes a value to cast as its first operand. +The return type is the type to cast the value to. Both types must be vectors of +:ref:`integer ` type. The bit size of the value must be smaller than +the bit size of the return type. The second operand is the vector mask. The +return type, the value to cast, and the vector mask have the same number of +elements. The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.sext``' intrinsic performs a sign extension by copying the sign +bit (highest order bit) of the value until it reaches the size of the return +type. When zero extending from i1, the result will always be either -1 or 0. +The conversion is performed on lane positions below the explicit vector length +and where the vector mask is true. Masked-off lanes are undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.sext.v4i32.v4i16(<4 x i16> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = sext <4 x i16> %a to <4 x i32> + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef + + +.. _int_vp_fptrunc: + +'``llvm.vp.fptrunc.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x float> @llvm.vp.fptrunc.v16f32.v16f64 (<16 x double> , <16 x i1> , i32 ) + declare @llvm.vp.trunc.nxv4f32.nxv4f64 ( , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.fptrunc``' intrinsic truncates its first operand to the return +type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.fptrunc``' intrinsic takes a value to cast as its first operand. +The return type is the type to cast the value to. Both types must be vector of +:ref:`floating-point ` type. The bit size of the value must be +larger than the bit size of the return type. This implies that +'``llvm.vp.fptrunc``' cannot be used to make a *no-op cast*. The second operand +is the vector mask. The return type, the value to cast, and the vector mask have +the same number of elements. The third operand is the explicit vector length of +the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.fptrunc``' intrinsic casts a ``value`` from a larger +:ref:`floating-point ` type to a smaller :ref:`floating-point +` type. +This instruction is assumed to execute in the default :ref:`floating-point +environment `. The conversion is performed on lane positions below the +explicit vector length and where the vector mask is true. Masked-off lanes are +undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = fptrunc <4 x double> %a to <4 x float> + %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef + + +.. _int_vp_fpext: + +'``llvm.vp.fpext.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x double> @llvm.vp.fpext.v16f64.v16f32 (<16 x float> , <16 x i1> , i32 ) + declare @llvm.vp.fpext.nxv4f64.nxv4f32 ( , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.fpext``' intrinsic extends its first operand to the return +type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.fpext``' intrinsic takes a value to cast as its first operand. +The return type is the type to cast the value to. Both types must be vector of +:ref:`floating-point ` type. The bit size of the value must be +smaller than the bit size of the return type. This implies that +'``llvm.vp.fpext``' cannot be used to make a *no-op cast*. The second operand +is the vector mask. The return type, the value to cast, and the vector mask have +the same number of elements. The third operand is the explicit vector length of +the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.fpext``' intrinsic extends the ``value`` from a smaller +:ref:`floating-point ` type to a larger :ref:`floating-point +` type. The '``llvm.vp.fpext``' cannot be used to make a +*no-op cast* because it always changes bits. Use ``bitcast`` to make a +*no-op cast* for a floating-point cast. +The conversion is performed on lane positions below the explicit vector length +and where the vector mask is true. Masked-off lanes are undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = fpext <4 x float> %a to <4 x double> + %also.r = select <4 x i1> %mask, <4 x double> %t, <4 x double> undef + + +.. _int_vp_fptoui: + +'``llvm.vp.fptoui.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.fptoui.v16i32.v16f32 (<16 x float> , <16 x i1> , i32 ) + declare @llvm.vp.fptoui.nxv4i32.nxv4f32 ( , , i32 ) + declare <256 x i64> @llvm.vp.fptoui.v256i64.v256f64 (<256 x double> , <256 x i1> , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.fptoui``' intrinsic converts the :ref:`floating-point +` operand to the unsigned integer return type. +The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.fptoui``' intrinsic takes a value to cast as its first operand. +The value to cast must be a vector of :ref:`floating-point ` type. +The return type is the type to cast the value to. The return type must be +vector of :ref:`integer ` type. The second operand is the vector +mask. The return type, the value to cast, and the vector mask have the same +number of elements. The third operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.fptoui``' intrinsic converts its :ref:`floating-point +` operand into the nearest (rounding towards zero) unsigned integer +value where the lane position is below the explicit vector length and the +vector mask is true. Masked-off lanes are undefined. On enabled lanes where +conversion takes place and the value cannot fit in the return type, the result +on that lane is a :ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = fptoui <4 x float> %a to <4 x i32> + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef + + .. _int_vp_fptosi: '``llvm.vp.fptosi.*``' Intrinsics @@ -20231,6 +20557,63 @@ %t = fptosi <4 x float> %a to <4 x i32> %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef + +.. _int_vp_uitofp: + +'``llvm.vp.uitofp.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x float> @llvm.vp.uitofp.v16f32.v16i32 (<16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.uitofp.nxv4f32.nxv4i32 ( , , i32 ) + declare <256 x double> @llvm.vp.uitofp.v256f64.v256i64 (<256 x i64> , <256 x i1> , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.uitofp``' intrinsic converts its unsigned integer operand to the +:ref:`floating-point ` return type. The operation has a mask and +an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.uitofp``' intrinsic takes a value to cast as its first operand. +The value to cast must be vector of :ref:`integer ` type. The +return type is the type to cast the value to. The return type must be a vector +of :ref:`floating-point ` type. The second operand is the vector +mask. The return type, the value to cast, and the vector mask have the same +number of elements. The third operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.uitofp``' intrinsic interprets its first operand as an unsigned +integer quantity and converts it to the corresponding floating-point value. If +the value cannot be exactly represented, it is rounded using the default +rounding mode. The conversion is performed on lane positions below the +explicit vector length and where the vector mask is true. Masked-off lanes are +undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = uitofp <4 x i32> %a to <4 x float> + %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef + + .. _int_vp_sitofp: '``llvm.vp.sitofp.*``' Intrinsics @@ -20287,6 +20670,118 @@ %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef +.. _int_vp_ptrtoint: + +'``llvm.vp.ptrtoint.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i8> @llvm.vp.ptrtoint.v16i8.v16p0i32 (<16 x i32*> , <16 x i1> , i32 ) + declare @llvm.vp.ptrtoint.nxv4i8.nxv4p0i32 ( , , i32 ) + declare <256 x i64> @llvm.vp.ptrtoint.v16i64.v16p0i32 (<256 x i32*> , <256 x i1> , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.ptrtoint``' intrinsic converts its pointer to the integer return +type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.ptrtoint``' intrinsic takes a value to cast as its first operand +, which must be a vector of pointers, and a type to cast it to return type, +which must be a vector of :ref:`integer ` type. +The second operand is the vector mask. The return type, the value to cast, and +the vector mask have the same number of elements. +The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.ptrtoint``' intrinsic converts value to return type by +interpreting the pointer value as an integer and either truncating or zero +extending that value to the size of the integer type. +If ``value`` is smaller than return type, then a zero extension is done. If +``value`` is larger than return type, then a truncation is done. If they are +the same size, then nothing is done (*no-op cast*) other than a type +change. +The conversion is performed on lane positions below the explicit vector length +and where the vector mask is true. Masked-off lanes are undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i8> @llvm.vp.ptrtoint.v4i8.v4p0i32(<4 x i32*> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = ptrtoint <4 x i32*> %a to <4 x i8> + %also.r = select <4 x i1> %mask, <4 x i8> %t, <4 x i8> undef + + +.. _int_vp_inttoptr: + +'``llvm.vp.inttoptr.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32*> @llvm.vp.inttoptr.v16p0i32.v16i32 (<16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.inttoptr.nxv4p0i32.nxv4i32 ( , , i32 ) + declare <256 x i32*> @llvm.vp.inttoptr.v256p0i32.v256i32 (<256 x i32> , <256 x i1> , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.inttoptr``' intrinsic converts its integer value to the point +return type. The operation has a mask and an explicit vector length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.inttoptr``' intrinsic takes a value to cast as its first operand +, which must be a vector of :ref:`integer ` type, and a type to cast +it to return type, which must be a vector of pointers type. +The second operand is the vector mask. The return type, the value to cast, and +the vector mask have the same number of elements. +The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.inttoptr``' intrinsic converts ``value`` to return type by +applying either a zero extension or a truncation depending on the size of the +integer ``value``. If ``value`` is larger than the size of a pointer, then a +truncation is done. If ``value`` is smaller than the size of a pointer, then a +zero extension is done. If they are the same size, nothing is done (*no-op cast*). +The conversion is performed on lane positions below the explicit vector length +and where the vector mask is true. Masked-off lanes are undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32*> @llvm.vp.inttoptr.v4p0i32.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = inttoptr <4 x i32> %a to <4 x i32*> + %also.r = select <4 x i1> %mask, <4 x i32*> %t, <4 x i32*> undef + + .. _int_vp_fcmp: '``llvm.vp.fcmp.*``' Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1545,14 +1545,50 @@ } // Casts. +def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; +def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; +def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; // Shuffles. def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -238,24 +238,65 @@ ///// Type Casts { // Specialized helper macro for type conversions. // (%x, %mask, %evl). -#ifdef HELPER_REGISTER_CAST_VP +#ifdef HELPER_REGISTER_FP_CAST_VP #error \ - "The internal helper macro HELPER_REGISTER_CAST_VP is already defined!" + "The internal helper macro HELPER_REGISTER_FP_CAST_VP is already defined!" #endif -#define HELPER_REGISTER_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \ +#define HELPER_REGISTER_FP_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \ BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ - VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \ - VP_PROPERTY_CASTOP \ + VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \ + VP_PROPERTY_CASTOP \ END_REGISTER_VP(vp_##OPSUFFIX, VPSD) +// llvm.vp.fptoui(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fptoui, VP_FPTOUI, FPToUI, 0) + // llvm.vp.fptosi(x,mask,vlen) -HELPER_REGISTER_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0) +HELPER_REGISTER_FP_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0) + +// llvm.vp.uitofp(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(uitofp, VP_UITOFP, UIToFP, 1) // llvm.vp.sitofp(x,mask,vlen) -HELPER_REGISTER_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1) +HELPER_REGISTER_FP_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1) + +// llvm.vp.fptrunc(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fptrunc, VP_FPTRUNC, FPTrunc, 1) + +// llvm.vp.fpext(x,mask,vlen) +HELPER_REGISTER_FP_CAST_VP(fpext, VP_FPEXT, FPExt, 0) + +#undef HELPER_REGISTER_FP_CAST_VP + +// Specialized helper macro for integer type conversions. +// (%x, %mask, %evl). +#ifdef HELPER_REGISTER_INT_CAST_VP +#error \ + "The internal helper macro HELPER_REGISTER_INT_CAST_VP is already defined!" +#endif +#define HELPER_REGISTER_INT_CAST_VP(OPSUFFIX, VPSD, IROPC) \ + BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_CASTOP \ + END_REGISTER_VP(vp_##OPSUFFIX, VPSD) + +// llvm.vp.trunc(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(trunc, VP_TRUNC, Trunc) + +// llvm.vp.zext(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(zext, VP_ZEXT, ZExt) + +// llvm.vp.sext(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(sext, VP_SEXT, SExt) + +// llvm.vp.ptrtoint(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(ptrtoint, VP_PTRTOINT, PtrToInt) + +// llvm.vp.inttoptr(x,mask,vlen) +HELPER_REGISTER_INT_CAST_VP(inttoptr, VP_INTTOPTR, IntToPtr) -#undef HELPER_REGISTER_CAST_VP +#undef HELPER_REGISTER_INT_CAST_VP ///// } Type Casts diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -501,8 +501,17 @@ VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy); break; } + case Intrinsic::vp_trunc: + case Intrinsic::vp_sext: + case Intrinsic::vp_zext: + case Intrinsic::vp_fptoui: case Intrinsic::vp_fptosi: + case Intrinsic::vp_uitofp: case Intrinsic::vp_sitofp: + case Intrinsic::vp_fptrunc: + case Intrinsic::vp_fpext: + case Intrinsic::vp_ptrtoint: + case Intrinsic::vp_inttoptr: VPFunc = Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()}); break; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5600,6 +5600,80 @@ "VP cast intrinsic first argument and result vector lengths must be " "equal", *VPCast); + + switch (VPCast->getIntrinsicID()) { + default: + llvm_unreachable("Unknown VP cast intrinsic"); + case Intrinsic::vp_trunc: + Assert(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.trunc intrinsic first argument and result element type " + "must be integer", + *VPCast); + Assert(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(), + "llvm.vp.trunc intrinsic the bit size of first argument must be " + "larger than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_zext: + case Intrinsic::vp_sext: + Assert(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.zext or llvm.vp.sext intrinsic first argument and result " + "element type must be integer", + *VPCast); + Assert(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(), + "llvm.vp.zext or llvm.vp.sext intrinsic the bit size of first " + "argument must be smaller than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_fptoui: + case Intrinsic::vp_fptosi: + Assert( + RetTy->isIntOrIntVectorTy() && ValTy->isFPOrFPVectorTy(), + "llvm.vp.fptoui or llvm.vp.fptosi intrinsic first argument element " + "type must be floating-point and result element type must be integer", + *VPCast); + break; + case Intrinsic::vp_uitofp: + case Intrinsic::vp_sitofp: + Assert( + RetTy->isFPOrFPVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.uitofp or llvm.vp.sitofp intrinsic first argument element " + "type must be integer and result element type must be floating-point", + *VPCast); + break; + case Intrinsic::vp_fptrunc: + Assert(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(), + "llvm.vp.fptrunc intrinsic first argument and result element type " + "must be floating-point", + *VPCast); + Assert(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(), + "llvm.vp.fptrunc intrinsic the bit size of first argument must be " + "larger than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_fpext: + Assert(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(), + "llvm.vp.fpext intrinsic first argument and result element type " + "must be floating-point", + *VPCast); + Assert(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(), + "llvm.vp.fpext intrinsic the bit size of first argument must be " + "smaller than the bit size of the return type", + *VPCast); + break; + case Intrinsic::vp_ptrtoint: + Assert(RetTy->isIntOrIntVectorTy() && ValTy->isPtrOrPtrVectorTy(), + "llvm.vp.ptrtoint intrinsic first argument element type must be " + "pointer and result element type must be integer", + *VPCast); + break; + case Intrinsic::vp_inttoptr: + Assert(RetTy->isPtrOrPtrVectorTy() && ValTy->isIntOrIntVectorTy(), + "llvm.vp.inttoptr intrinsic first argument element type must be " + "integer and result element type must be pointer", + *VPCast); + break; + } } if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) { auto Pred = cast(&VPI)->getPredicate(); diff --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll --- a/llvm/test/Verifier/vp-intrinsics.ll +++ b/llvm/test/Verifier/vp-intrinsics.ll @@ -57,9 +57,18 @@ ret void } -define void @test_vp_int_fp_conversions(<8 x i32> %i0, <8 x float> %f0, <8 x i1> %mask, i32 %evl) { - %r0 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl) +define void @test_vp_conversions(<8 x i32*> %p0, <8 x i32> %i0, <8 x i64> %i1, <8 x float> %f0, <8 x double> %f1, <8 x i1> %mask, i32 %evl) { + %r0 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> %f0, <8 x i1> %mask, i32 %evl) %r1 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> %f0, <8 x i1> %mask, i32 %evl) + %r2 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl) + %r3 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl) + %r4 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %f1, <8 x i1> %mask, i32 %evl) + %r5 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %f0, <8 x i1> %mask, i32 %evl) + %r6 = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %i1, <8 x i1> %mask, i32 %evl) + %r7 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl) + %r8 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl) + %r9 = call <8 x i32> @llvm.vp.ptrtoint.v8i32.v8p0i32(<8 x i32*> %p0, <8 x i1> %mask, i32 %evl) + %r10 = call <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl) ret void } @@ -105,8 +114,17 @@ declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) ; casts -declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.ptrtoint.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) +declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i32(<8 x i32>, <8 x i1>, i32) ; compares declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32) diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -92,10 +92,28 @@ Str << " declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x " "i32>, <8 x i32>, i32, <8 x i1>, i32, i32) "; + Str << " declare <8 x i32> @llvm.vp.fptoui.v8i32" + << ".v8f32(<8 x float>, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.fptosi.v8i32" << ".v8f32(<8 x float>, <8 x i1>, i32) "; + Str << " declare <8 x float> @llvm.vp.uitofp.v8f32" + << ".v8i32(<8 x i32>, <8 x i1>, i32) "; Str << " declare <8 x float> @llvm.vp.sitofp.v8f32" << ".v8i32(<8 x i32>, <8 x i1>, i32) "; + Str << " declare <8 x float> @llvm.vp.fptrunc.v8f32" + << ".v8f64(<8 x double>, <8 x i1>, i32) "; + Str << " declare <8 x double> @llvm.vp.fpext.v8f64" + << ".v8f32(<8 x float>, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.trunc.v8i32" + << ".v8i64(<8 x i64>, <8 x i1>, i32) "; + Str << " declare <8 x i64> @llvm.vp.zext.v8i64" + << ".v8i32(<8 x i32>, <8 x i1>, i32) "; + Str << " declare <8 x i64> @llvm.vp.sext.v8i64" + << ".v8i32(<8 x i32>, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.ptrtoint.v8i32" + << ".v8p0i32(<8 x i32*>, <8 x i1>, i32) "; + Str << " declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32" + << ".v8i32(<8 x i32>, <8 x i1>, i32) "; Str << " declare <8 x i1> @llvm.vp.fcmp.v8f32" << "(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) ";