diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20283,6 +20283,64 @@ %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef +.. _int_vp_fcmp: + +'``llvm.vp.fcmp.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i1> @llvm.vp.fcmp.v16f32(<16 x float> , <16 x float> , metadata , <16 x i1> , i32 ) + declare @llvm.vp.fcmp.nxv4f32( , , metadata , , i32 ) + declare <256 x i1> @llvm.vp.fcmp.v256f64(<256 x double> , <256 x double> , metadata , <256 x i1> , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.fcmp``' intrinsic returns a vector of boolean values based on +the comparison of its operands. The operation has a mask and an explicit vector +length parameter. + + +Arguments: +"""""""""" + +The '``llvm.vp.fcmp``' intrinsic takes the two values to compare as its first +and second operands. These two values must be vectors of :ref:`floating-point +` types. +The return type is the result of the comparison. The return type must be a +vector of :ref:`i1 ` type. The fourth operand is the vector mask. +The return type, the values to compare, and the vector mask have the same +number of elements. The third operand is the condition code indicating the kind +of comparison to perform. It must be a metadata string with :ref:`one of the +supported floating-point condition code values `. The fifth operand +is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.fcmp``' compares its first two operands according to the +condition code given as the third operand. The operands are compared element by +element on each enabled lane, where the the semantics of the comparison are +defined :ref:`according to the condition code `. Masked-off +lanes are undefined. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"oeq", <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = fcmp oeq <4 x float> %a, %b + %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics @@ -21423,6 +21481,8 @@ The third argument is the condition code indicating the kind of comparison to perform. It must be a metadata string with one of the following values: +.. _fcmp_md_cc: + - "``oeq``": ordered and equal - "``ogt``": ordered and greater than - "``oge``": ordered and greater than or equal @@ -21451,6 +21511,8 @@ vectors are compared element by element. Each comparison performed always yields an :ref:`i1 ` result, as follows: +.. _fcmp_md_cc_sem: + - "``oeq``": yields ``true`` if both operands are not a NAN and ``op1`` is equal to ``op2``. - "``ogt``": yields ``true`` if both operands are not a NAN and ``op1`` diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -488,6 +488,23 @@ /// @} }; +class VPCmpIntrinsic : public VPIntrinsic { +public: + static bool isVPCmp(Intrinsic::ID ID); + + CmpInst::Predicate getPredicate() const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// @{ + static bool classof(const IntrinsicInst *I) { + return VPCmpIntrinsic::isVPCmp(I->getIntrinsicID()); + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1567,6 +1567,16 @@ LLVMMatchType<0>, llvm_i32_ty]>; +// Comparisons. +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { + def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyvector_ty, + LLVMMatchType<0>, + llvm_metadata_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +} + // Reductions let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -126,6 +126,13 @@ #define VP_PROPERTY_CASTOP #endif +// This VP Intrinsic is a comparison operation +// The condition code arg is at CCPOS and accepts floating-point condition +// codes if ISFP is set, else it accepts integer condition codes. +#ifndef VP_PROPERTY_CMP +#define VP_PROPERTY_CMP(CCPOS, ISFP) +#endif + /// } Property Macros ///// Integer Arithmetic { @@ -252,6 +259,16 @@ ///// } Type Casts +///// Comparisons { +// llvm.vp.fcmp(x,y,cc,mask,vlen) +BEGIN_REGISTER_VP(vp_fcmp, 3, 4, VP_FCMP, -1) +VP_PROPERTY_FUNCTIONAL_OPC(FCmp) +VP_PROPERTY_CMP(2, true) +VP_PROPERTY_CONSTRAINEDFP(0, 1, experimental_constrained_fcmp) +END_REGISTER_VP(vp_fcmp, VP_FCMP) + +///// } Comparisons + ///// Memory Operations { // llvm.vp.store(val,ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3) @@ -424,6 +441,7 @@ #undef END_REGISTER_VP_SDNODE #undef VP_PROPERTY_BINARYOP #undef VP_PROPERTY_CASTOP +#undef VP_PROPERTY_CMP #undef VP_PROPERTY_CONSTRAINEDFP #undef VP_PROPERTY_FUNCTIONAL_INTRINSIC #undef VP_PROPERTY_FUNCTIONAL_OPC diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -236,8 +236,8 @@ return true; } -FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const { - Metadata *MD = cast(getArgOperand(2))->getMetadata(); +static FCmpInst::Predicate getFPPredicateFromMD(const Value *Op) { + Metadata *MD = cast(Op)->getMetadata(); if (!MD || !isa(MD)) return FCmpInst::BAD_FCMP_PREDICATE; return StringSwitch(cast(MD)->getString()) @@ -258,6 +258,10 @@ .Default(FCmpInst::BAD_FCMP_PREDICATE); } +FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const { + return getFPPredicateFromMD(getArgOperand(2)); +} + bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: @@ -560,6 +564,37 @@ return false; } +bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) { + switch (ID) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_CMP(CCPOS, ...) return true; +#define END_REGISTER_VP_INTRINSIC(VPID) break; +#include "llvm/IR/VPIntrinsics.def" + } + return false; +} + +CmpInst::Predicate VPCmpIntrinsic::getPredicate() const { + bool IsFP = true; + Optional CCArgIdx; + switch (getIntrinsicID()) { + default: + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_CMP(CCPOS, ISFP) \ + CCArgIdx = CCPOS; \ + IsFP = ISFP; \ + break; +#define END_REGISTER_VP_INTRINSIC(VPID) break; +#include "llvm/IR/VPIntrinsics.def" + } + assert(CCArgIdx.hasValue() && IsFP && + "Unexpected vector-predicated comparison"); + return getFPPredicateFromMD(getArgOperand(*CCArgIdx)); +} + unsigned VPReductionIntrinsic::getVectorParamPos() const { return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID()); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5601,6 +5601,11 @@ "equal", *VPCast); } + if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) { + auto Pred = cast(&VPI)->getPredicate(); + Assert(CmpInst::isFPPredicate(Pred), + "invalid predicate for VP FP comparison intrinsic", &VPI); + } } void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { diff --git a/llvm/test/Verifier/invalid-vp-intrinsics.ll b/llvm/test/Verifier/invalid-vp-intrinsics.ll --- a/llvm/test/Verifier/invalid-vp-intrinsics.ll +++ b/llvm/test/Verifier/invalid-vp-intrinsics.ll @@ -1,6 +1,7 @@ ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s -declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32); +declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32) +declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32) ; CHECK: VP cast intrinsic first argument and result vector lengths must be equal ; CHECK-NEXT: %r0 = call <4 x i32> @@ -9,3 +10,14 @@ %r0 = call <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float> %src, <4 x i1> %m, i32 %n) ret void } + +; CHECK: invalid predicate for VP FP comparison intrinsic +; CHECK-NEXT: %r0 = call <4 x i1> @llvm.vp.fcmp.v4f32 +; CHECK: invalid predicate for VP FP comparison intrinsic +; CHECK-NEXT: %r1 = call <4 x i1> @llvm.vp.fcmp.v4f32 + +define void @test_vp_fcmp(<4 x float> %a, <4 x float> %b, <4 x i1> %m, i32 %n) { + %r0 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"bad", <4 x i1> %m, i32 %n) + %r1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"eq", <4 x i1> %m, i32 %n) + ret void +} diff --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll --- a/llvm/test/Verifier/vp-intrinsics.ll +++ b/llvm/test/Verifier/vp-intrinsics.ll @@ -63,6 +63,11 @@ ret void } +define void @test_vp_comparisons(<8 x float> %f0, <8 x float> %f1, <8 x i1> %mask, i32 %evl) { + %r0 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %mask, i32 %evl) + ret void +} + ; integer arith declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) @@ -101,6 +106,8 @@ ; casts declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32) declare <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float>, <8 x i1>, i32) +; compares +declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) ; shuffles declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32>, <8 x i32>, i32, <8 x i1>, i32, i32) declare @llvm.experimental.vp.splice.nxv8i32(, , i32, , i32, i32) diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -97,6 +97,9 @@ Str << " declare <8 x float> @llvm.vp.sitofp.v8f32" << ".v8i32(<8 x i32>, <8 x i1>, i32) "; + Str << " declare <8 x i1> @llvm.vp.fcmp.v8f32" + << "(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) "; + return parseAssemblyString(Str.str(), Err, C); } }; @@ -314,7 +317,7 @@ } /// Check that the HANDLE_VP_TO_CONSTRAINEDFP maps to an existing intrinsic with -/// the right amount of metadata args. +/// the right amount of constrained-fp metadata args. TEST_F(VPIntrinsicTest, HandleToConstrainedFP) { #define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, CFPID) \ { \ @@ -323,7 +326,8 @@ unsigned NumMetadataArgs = 0; \ for (auto TD : T) \ NumMetadataArgs += (TD.Kind == Intrinsic::IITDescriptor::Metadata); \ - ASSERT_EQ(NumMetadataArgs, (unsigned)(HASROUND + HASEXCEPT)); \ + bool IsCmp = Intrinsic::CFPID == Intrinsic::experimental_constrained_fcmp; \ + ASSERT_EQ(NumMetadataArgs, (unsigned)(IsCmp + HASROUND + HASEXCEPT)); \ } #include "llvm/IR/VPIntrinsics.def" }