Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -15623,6 +15623,113 @@ than the operand. All restrictions that apply to the fpext instruction also apply to this intrinsic. +'``llvm.experimental.constrained.fcmp``' and '``llvm.experimental.constrained.fcmps``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fcmp( , , + metadata , + metadata ) + declare + @llvm.experimental.constrained.fcmps( , , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fcmp``' and +'``llvm.experimental.constrained.fcmps``' intrinsics return a boolean +value or vector of boolean values based on comparison of its operands. + +If the operands are floating-point scalars, then the result type is a +boolean (:ref:`i1 `). + +If the operands are floating-point vectors, then the result type is a +vector of boolean with the same number of elements as the operands being +compared. + +The '``llvm.experimental.constrained.fcmp``' intrinsic performs a quiet +comparison operation while the '``llvm.experimental.constrained.fcmps``' +intrinsic performs a signaling comparison operation. + +Arguments: +"""""""""" + +The first two arguments to the '``llvm.experimental.constrained.fcmp``' +and '``llvm.experimental.constrained.fcmps``' intrinsics must be +:ref:`floating-point ` or :ref:`vector ` +of floating-point values. Both arguments must have identical types. + +The third argument is the condition code indicating the kind of comparison +to perform. It must be a metadata string with one of the following values: + +- "``oeq``": ordered and equal +- "``ogt``": ordered and greater than +- "``oge``": ordered and greater than or equal +- "``olt``": ordered and less than +- "``ole``": ordered and less than or equal +- "``one``": ordered and not equal +- "``ord``": ordered (no nans) +- "``ueq``": unordered or equal +- "``ugt``": unordered or greater than +- "``uge``": unordered or greater than or equal +- "``ult``": unordered or less than +- "``ule``": unordered or less than or equal +- "``une``": unordered or not equal +- "``uno``": unordered (either nans) + +*Ordered* means that neither operand is a NAN while *unordered* means +that either operand may be a NAN. + +The fourth argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +``op1`` and ``op2`` are compared according to the condition code given +as the third argument. If the operands are vectors, then the +vectors are compared element by element. Each comparison performed +always yields an :ref:`i1 ` result, as follows: + +- "``oeq``": yields ``true`` if both operands are not a NAN and ``op1`` + is equal to ``op2``. +- "``ogt``": yields ``true`` if both operands are not a NAN and ``op1`` + is greater than ``op2``. +- "``oge``": yields ``true`` if both operands are not a NAN and ``op1`` + is greater than or equal to ``op2``. +- "``olt``": yields ``true`` if both operands are not a NAN and ``op1`` + is less than ``op2``. +- "``ole``": yields ``true`` if both operands are not a NAN and ``op1`` + is less than or equal to ``op2``. +- "``one``": yields ``true`` if both operands are not a NAN and ``op1`` + is not equal to ``op2``. +- "``ord``": yields ``true`` if both operands are not a NAN. +- "``ueq``": yields ``true`` if either operand is a NAN or ``op1`` is + equal to ``op2``. +- "``ugt``": yields ``true`` if either operand is a NAN or ``op1`` is + greater than ``op2``. +- "``uge``": yields ``true`` if either operand is a NAN or ``op1`` is + greater than or equal to ``op2``. +- "``ult``": yields ``true`` if either operand is a NAN or ``op1`` is + less than ``op2``. +- "``ule``": yields ``true`` if either operand is a NAN or ``op1`` is + less than or equal to ``op2``. +- "``une``": yields ``true`` if either operand is a NAN or ``op1`` is + not equal to ``op2``. +- "``uno``": yields ``true`` if either operand is a NAN. + +The quiet comparison operation performed by +'``llvm.experimental.constrained.fcmp``' will only raise an exception +if either operand is a SNAN. The signaling comparison operation +performed by '``llvm.experimental.constrained.fcmps``' will raise an +exception if either operand is a NAN (QNAN or SNAN). + Constrained libm-equivalent Intrinsics -------------------------------------- Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -330,6 +330,12 @@ /// It is used to limit optimizations while the DAG is being optimized. STRICT_FP_EXTEND, + /// STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used + /// for floating-point operands only. STRICT_FSETCC performs a quiet + /// comparison operation, while STRICT_FSETCCS performs a signaling + /// comparison operation. + STRICT_FSETCC, STRICT_FSETCCS, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -951,6 +951,8 @@ default: llvm_unreachable("Unexpected FP pseudo-opcode"); #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; #include "llvm/IR/ConstrainedOps.def" } Index: llvm/include/llvm/IR/ConstrainedOps.def =================================================================== --- llvm/include/llvm/IR/ConstrainedOps.def +++ llvm/include/llvm/IR/ConstrainedOps.def @@ -20,6 +20,11 @@ #define FUNCTION INSTRUCTION #endif +// Likewise for compare instructions. +#ifndef CMP_INSTRUCTION +#define CMP_INSTRUCTION INSTRUCTION +#endif + // Arguments of the entries are: // - instruction or intrinsic function name. // - Number of original instruction/intrinsic arguments. @@ -40,6 +45,11 @@ INSTRUCTION(FPToUI, 1, 0, experimental_constrained_fptoui, FP_TO_UINT) INSTRUCTION(FPTrunc, 1, 1, experimental_constrained_fptrunc, FP_ROUND) +// These are definitions for compare instructions (signaling and quiet version). +// Both of these match to FCmp / SETCC. +CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmp, FSETCC) +CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS) + // Theses are definitions for intrinsic functions, that are converted into // constrained intrinsics. // @@ -69,3 +79,4 @@ #undef INSTRUCTION #undef FUNCTION +#undef CMP_INSTRUCTION Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -221,6 +221,25 @@ } }; + /// Constrained floating point compare intrinsics. + class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic { + public: + FCmpInst::Predicate getPredicate() const; + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: + return true; + default: return false; + } + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + /// This class represents an intrinsic that is based on a binary operation. /// This includes op.with.overflow and saturating add/sub intrinsics. class BinaryOpIntrinsic : public IntrinsicInst { Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -743,8 +743,18 @@ [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + + // Constrained floating-point comparison (quiet and signaling variants). + // Third operand is the predicate represented as a metadata string. + def int_experimental_constrained_fcmp + : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyfloat_ty, LLVMMatchType<0>, + llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fcmps + : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyfloat_ty, LLVMMatchType<0>, + llvm_metadata_ty, llvm_metadata_ty ]>; } -// FIXME: Add intrinsic for fcmp. // FIXME: Consider maybe adding intrinsics for sitofp, uitofp. //===------------------------- Expect Intrinsics --------------------------===// Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1036,11 +1036,17 @@ Node->getOperand(2).getValueType()); break; case ISD::SELECT_CC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 3 : + Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 : Node->getOpcode() == ISD::SETCC ? 2 : 1; - unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 1 : + Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -75,6 +75,8 @@ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; @@ -816,7 +818,8 @@ } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT InVT = N->getOperand(0).getValueType(); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + EVT InVT = N->getOperand(OpNo).getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT SVT = getSetCCResultType(InVT); @@ -835,12 +838,22 @@ } SDLoc dl(N); - assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() && "Vector compare must return a vector result!"); // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC; + if (N->isStrictFPOpcode()) { + EVT VTs[] = {SVT, MVT::Other}; + SDValue Opers[] = {N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)}; + SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1)); + } else + SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1323,7 +1323,14 @@ unsigned NumElems = VT.getVectorNumElements(); unsigned NumOpers = Op.getNumOperands(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ValueVTs[] = {EltVT, MVT::Other}; + + EVT TmpEltVT = EltVT; + if (Op->getOpcode() == ISD::STRICT_FSETCC || + Op->getOpcode() == ISD::STRICT_FSETCCS) + TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT); + + EVT ValueVTs[] = {TmpEltVT, MVT::Other}; SDValue Chain = Op.getOperand(0); SDLoc dl(Op); @@ -1350,9 +1357,18 @@ } SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + SDValue ScalarResult = ScalarOp.getValue(0); + SDValue ScalarChain = ScalarOp.getValue(1); + + if (Op->getOpcode() == ISD::STRICT_FSETCC || + Op->getOpcode() == ISD::STRICT_FSETCCS) + ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); - OpValues.push_back(ScalarOp.getValue(0)); - OpChains.push_back(ScalarOp.getValue(1)); + OpValues.push_back(ScalarResult); + OpChains.push_back(ScalarChain); } SDValue Result = DAG.getBuildVector(VT, dl, OpValues); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3749,6 +3749,17 @@ WidenVT, N->getOperand(0)); } +// Return true is this is a SETCC node or a strict version of it. +static inline bool isSETCCOp(unsigned Opcode) { + switch (Opcode) { + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + return true; + } + return false; +} + // Return true if this is a node that could have two SETCCs as operands. static inline bool isLogicalMaskOp(unsigned Opcode) { switch (Opcode) { @@ -3760,6 +3771,13 @@ return false; } +// If N is a SETCC or a strict variant of it, return the type +// of the compare operands. +static inline EVT getSETCCOperandType(SDValue N) { + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + return N->getOperand(OpNo).getValueType(); +} + // This is used just for the assert in convertMask(). Check that this either // a SETCC or a previously handled SETCC by convertMask(). #ifndef NDEBUG @@ -3782,7 +3800,7 @@ return isSETCCorConvertedSETCC(N.getOperand(0)) && isSETCCorConvertedSETCC(N.getOperand(1)); - return (N.getOpcode() == ISD::SETCC || + return (isSETCCOp(N.getOpcode()) || ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -3797,10 +3815,17 @@ assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. + SDValue Mask; SmallVector Ops; for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); + if (InMask->isStrictFPOpcode()) { + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), + { MaskVT, MVT::Other }, Ops); + ReplaceValueWith(InMask.getValue(1), Mask.getValue(1)); + } + else + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. @@ -3853,7 +3878,7 @@ if (N->getOpcode() != ISD::VSELECT) return SDValue(); - if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode())) + if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode())) return SDValue(); // If this is a splitted VSELECT that was previously already handled, do @@ -3876,8 +3901,8 @@ return SDValue(); // If there is support for an i1 vector mask, don't touch. - if (Cond.getOpcode() == ISD::SETCC) { - EVT SetCCOpVT = Cond->getOperand(0).getValueType(); + if (isSETCCOp(Cond.getOpcode())) { + EVT SetCCOpVT = getSETCCOperandType(Cond); while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); EVT SetCCResVT = getSetCCResultType(SetCCOpVT); @@ -3908,17 +3933,17 @@ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); SDValue Mask; - if (Cond->getOpcode() == ISD::SETCC) { - EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType()); + if (isSETCCOp(Cond->getOpcode())) { + EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond)); Mask = convertMask(Cond, MaskVT, ToMaskVT); } else if (isLogicalMaskOp(Cond->getOpcode()) && - Cond->getOperand(0).getOpcode() == ISD::SETCC && - Cond->getOperand(1).getOpcode() == ISD::SETCC) { + isSETCCOp(Cond->getOperand(0).getOpcode()) && + isSETCCOp(Cond->getOperand(1).getOpcode())) { // Cond is (AND/OR/XOR (SETCC, SETCC)) SDValue SETCC0 = Cond->getOperand(0); SDValue SETCC1 = Cond->getOperand(1); - EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType()); - EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType()); + EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0)); + EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1)); unsigned ScalarBits0 = VT0.getScalarSizeInBits(); unsigned ScalarBits1 = VT1.getScalarSizeInBits(); unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2798,12 +2798,16 @@ Known.Zero.setBitsFrom(1); break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) Known.Zero.setBitsFrom(1); break; + } case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -3662,11 +3666,15 @@ return VTBits; break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; + } case ISD::ROTL: case ISD::ROTR: if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { @@ -7766,6 +7774,8 @@ llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; #include "llvm/IR/ConstrainedOps.def" } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6909,9 +6909,21 @@ #include "llvm/IR/ConstrainedOps.def" } - if (Opcode == ISD::STRICT_FP_ROUND) + // A few strict DAG nodes carry additional operands that are not + // set up by the default code above. + switch (Opcode) { + default: break; + case ISD::STRICT_FP_ROUND: Opers.push_back( DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); + break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + auto *FPCmp = dyn_cast(&FPI); + Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate()))); + break; + } + } SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -270,6 +270,8 @@ case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCCARRY: return "setcccarry"; + case ISD::STRICT_FSETCC: return "strict_fsetcc"; + case ISD::STRICT_FSETCCS: return "strict_fsetccs"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; Index: llvm/lib/IR/IntrinsicInst.cpp =================================================================== --- llvm/lib/IR/IntrinsicInst.cpp +++ llvm/lib/IR/IntrinsicInst.cpp @@ -121,6 +121,30 @@ return StrToExceptionBehavior(cast(MD)->getString()); } +FCmpInst::Predicate +ConstrainedFPCmpIntrinsic::getPredicate() const { + Metadata *MD = + cast(getArgOperand(2))->getMetadata(); + if (!MD || !isa(MD)) + return FCmpInst::BAD_FCMP_PREDICATE; + return StringSwitch(cast(MD)->getString()) + .Case("oeq", FCmpInst::FCMP_OEQ) + .Case("ogt", FCmpInst::FCMP_OGT) + .Case("oge", FCmpInst::FCMP_OGE) + .Case("olt", FCmpInst::FCMP_OLT) + .Case("ole", FCmpInst::FCMP_OLE) + .Case("one", FCmpInst::FCMP_ONE) + .Case("ord", FCmpInst::FCMP_ORD) + .Case("uno", FCmpInst::FCMP_UNO) + .Case("ueq", FCmpInst::FCMP_UEQ) + .Case("ugt", FCmpInst::FCMP_UGT) + .Case("uge", FCmpInst::FCMP_UGE) + .Case("ult", FCmpInst::FCMP_ULT) + .Case("ule", FCmpInst::FCMP_ULE) + .Case("une", FCmpInst::FCMP_UNE) + .Default(FCmpInst::BAD_FCMP_PREDICATE); +} + bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -4740,6 +4740,9 @@ llvm_unreachable("Invalid constrained FP intrinsic!"); } NumOperands += (1 + HasRoundingMD); + // Compare intrinsics carry an extra predicate metadata operand. + if (isa(FPI)) + NumOperands += 1; Assert((FPI.getNumArgOperands() == NumOperands), "invalid arguments for constrained FP intrinsic", &FPI); @@ -4762,6 +4765,14 @@ break; } + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: { + auto Pred = dyn_cast(&FPI)->getPredicate(); + Assert(CmpInst::isFPPredicate(Pred), + "invalid predicate for constrained FP comparison intrinsic", &FPI); + break; + } + case Intrinsic::experimental_constrained_fptosi: case Intrinsic::experimental_constrained_fptoui: { Value *Operand = FPI.getArgOperand(0); Index: llvm/lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -295,6 +295,11 @@ MIB.setMemRefs(MI.memoperands()); MI.eraseFromParent(); + // Mark instruction as raising an FP exception if applicable. We already + // verified earlier that this move is valid. + if (Compare.mayRaiseFPException()) + MIB.setMIFlag(MachineInstr::MIFlag::FPExcept); + return true; } @@ -312,6 +317,18 @@ const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; + // If Compare may raise an FP exception, we can only eliminate it + // if MI itself would have already raised the exception. + if (Compare.mayRaiseFPException()) { + // If the caller will change MI to use ConvOpc, only test whether + // ConvOpc is suitable; it is on the caller to set the MI flag. + if (ConvOpc && !Desc.mayRaiseFPException()) + return false; + // If the caller will not change MI, we test the MI flag here. + if (!ConvOpc && !MI.mayRaiseFPException()) + return false; + } + // See which compare-style condition codes are available. unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); @@ -454,6 +471,12 @@ CCRefs |= getRegReferences(MI, SystemZ::CC); if (CCRefs.Use && CCRefs.Def) break; + // Eliminating a Compare that may raise an FP exception will move + // raising the exception to some earlier MI. We cannot do this if + // there is anything in between that might change exception flags. + if (Compare.mayRaiseFPException() && + (MI.isCall() || MI.hasUnmodeledSideEffects())) + break; } // Also do a forward search to handle cases where an instruction after the Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -58,7 +58,8 @@ ICMP, // Floating-point comparisons. The two operands are the values to compare. - FCMP, + // Regular and strict (quiet and signaling) versions. + FCMP, STRICT_FCMP, STRICT_FCMPS, // Test under mask. The first operand is ANDed with the second operand // and the condition codes are set on the result. The third operand is @@ -248,9 +249,10 @@ // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and // greater than" and VFCMPHE for "ordered and greater than or equal to". - VFCMPE, - VFCMPH, - VFCMPHE, + // Regular and strict (quiet and signaling) versions. + VFCMPE, STRICT_VFCMPE, STRICT_VFCMPES, + VFCMPH, STRICT_VFCMPH, STRICT_VFCMPHS, + VFCMPHE, STRICT_VFCMPHE, STRICT_VFCMPHES, // Likewise, but also set the condition codes on the result. VFCMPES, @@ -261,8 +263,8 @@ VFTCI, // Extend the even f32 elements of vector operand 0 to produce a vector - // of f64 elements. - VEXTEND, + // of f64 elements. Regular and strict versions. + VEXTEND, STRICT_VEXTEND, // Round the f64 elements of vector operand 0 to f32s and store them in the // even elements of the result. @@ -531,11 +533,15 @@ // Implement LowerOperation for individual opcodes. SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue CmpOp0, SDValue CmpOp1) const; + SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const; SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, - SDValue CmpOp0, SDValue CmpOp1) const; + SDValue CmpOp0, SDValue CmpOp1, + SDValue Chain = SDValue(), + bool IsSignaling = false) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG, + bool IsSignaling) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -32,12 +32,16 @@ namespace { // Represents information about a comparison. struct Comparison { - Comparison(SDValue Op0In, SDValue Op1In) - : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) + : Op0(Op0In), Op1(Op1In), Chain(ChainIn), + Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} // The operands to the comparison. SDValue Op0, Op1; + // Chain if this is a strict floating-point comparison. + SDValue Chain; + // The opcode that should be used to compare Op0 and Op1. unsigned Opcode; @@ -132,6 +136,8 @@ if (isTypeLegal(VT)) { // Lower SET_CC into an IPM-based sequence. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); @@ -373,6 +379,9 @@ // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } } @@ -2168,6 +2177,10 @@ // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { + // This optimization is invalid for strict comparisons, since FNEG + // does not raise any exceptions. + if (C.Chain) + return; auto *C1 = dyn_cast(C.Op1); if (C1 && C1->isZero()) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -2455,7 +2468,7 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond) { - Comparison C(Call, SDValue()); + Comparison C(Call, SDValue(), SDValue()); C.Opcode = Opcode; C.CCValid = CCValid; if (Cond == ISD::SETEQ) @@ -2486,8 +2499,11 @@ // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, const SDLoc &DL) { + ISD::CondCode Cond, const SDLoc &DL, + SDValue Chain = SDValue(), + bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { + assert(!Chain); uint64_t Constant = cast(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && @@ -2499,13 +2515,19 @@ isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } - Comparison C(CmpOp0, CmpOp1); + Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { C.CCValid = SystemZ::CCMASK_FCMP; - C.Opcode = SystemZISD::FCMP; + if (!C.Chain) + C.Opcode = SystemZISD::FCMP; + else if (!IsSignaling) + C.Opcode = SystemZISD::STRICT_FCMP; + else + C.Opcode = SystemZISD::STRICT_FCMPS; adjustForFNeg(C); } else { + assert(!C.Chain); C.CCValid = SystemZ::CCMASK_ICMP; C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can @@ -2563,6 +2585,10 @@ return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } + if (C.Chain) { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); + } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2607,24 +2633,51 @@ } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot -// be done directly. IsFP is true if CC is for a floating-point rather than -// integer comparison. -static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { +// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP +// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) +// floating-point comparisons, and CmpMode::SignalingFP for strict signaling +// floating-point comparisons. +enum class CmpMode { Int, FP, StrictFP, SignalingFP }; +static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { switch (CC) { case ISD::SETOEQ: case ISD::SETEQ: - return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPE; + case CmpMode::FP: return SystemZISD::VFCMPE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; + default: llvm_unreachable("Bad mode"); + } case ISD::SETOGE: case ISD::SETGE: - return IsFP ? SystemZISD::VFCMPHE : static_cast(0); + switch (Mode) { + case CmpMode::Int: return 0; + case CmpMode::FP: return SystemZISD::VFCMPHE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; + default: llvm_unreachable("Bad mode"); + } case ISD::SETOGT: case ISD::SETGT: - return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPH; + case CmpMode::FP: return SystemZISD::VFCMPH; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; + default: llvm_unreachable("Bad mode"); + } case ISD::SETUGT: - return IsFP ? static_cast(0) : SystemZISD::VICMPHL; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPHL; + case CmpMode::FP: return 0; + case CmpMode::StrictFP: return 0; + case CmpMode::SignalingFP: return 0; + default: llvm_unreachable("Bad mode"); + } default: return 0; @@ -2633,17 +2686,16 @@ // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the -// result is for the inverse of CC. IsFP is true if CC is for a -// floating-point rather than integer comparison. -static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, +// result is for the inverse of CC. Mode is as above. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert) { - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = false; return Opcode; } - CC = ISD::getSetCCInverse(CC, !IsFP); - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int); + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = true; return Opcode; } @@ -2652,44 +2704,73 @@ } // Return a v2f64 that contains the extended form of elements Start and Start+1 -// of v4f32 value Op. +// of v4f32 value Op. If Chain is nonnull, return the strict form. static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, - SDValue Op) { + SDValue Op, SDValue Chain) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); + return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); + } return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); } // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, -// producing a result of type VT. +// producing a result of type VT. If Chain is nonnull, return the strict form. SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain) const { // There is no hardware support for v4f32 (unless we have the vector // enhancements facility 1), so extend the vector into two v2f64s // and compare those. if (CmpOp0.getValueType() == MVT::v4f32 && !Subtarget.hasVectorEnhancements1()) { - SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); - SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); - SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); - SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); + SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), + H1.getValue(1), L1.getValue(1), + HRes.getValue(1), LRes.getValue(1) }; + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue Ops[2] = { Res, NewChain }; + return DAG.getMergeValues(Ops, DL); + } SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); } + if (Chain) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); + } return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); } // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing -// an integer mask of type VT. +// an integer mask of type VT. If Chain is nonnull, we have a strict +// floating-point comparison. If in addition IsSignaling is true, we have +// a strict signaling floating-point comparison. SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain, + bool IsSignaling) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + assert (!Chain || IsFP); + assert (!IsSignaling || Chain); + CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : + Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; bool Invert = false; SDValue Cmp; switch (CC) { @@ -2699,9 +2780,14 @@ LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GE.getValue(1)); break; } @@ -2711,9 +2797,14 @@ LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GT.getValue(1)); break; } @@ -2721,15 +2812,17 @@ // matter whether we try the inversion or the swap first, since // there are no cases where both work. default: - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); else { CC = ISD::getSetCCSwappedOperands(CC); - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); else llvm_unreachable("Unhandled comparison"); } + if (Chain) + Chain = Cmp.getValue(1); break; } if (Invert) { @@ -2737,6 +2830,10 @@ DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } + if (Chain && Chain.getNode() != Cmp.getNode()) { + SDValue Ops[2] = { Cmp, Chain }; + Cmp = DAG.getMergeValues(Ops, DL); + } return Cmp; } @@ -2755,6 +2852,29 @@ return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } +SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, + SelectionDAG &DAG, + bool IsSignaling) const { + SDValue Chain = Op.getOperand(0); + SDValue CmpOp0 = Op.getOperand(1); + SDValue CmpOp1 = Op.getOperand(2); + ISD::CondCode CC = cast(Op.getOperand(3))->get(); + SDLoc DL(Op); + EVT VT = Op.getNode()->getValueType(0); + if (VT.isVector()) { + SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, + Chain, IsSignaling); + return Res.getValue(Op.getResNo()); + } + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); + SDValue CCReg = emitCmp(DAG, DL, C); + CCReg->setFlags(Op->getFlags()); + SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); + SDValue Ops[2] = { Result, CCReg.getValue(1) }; + return DAG.getMergeValues(Ops, DL); +} + SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); @@ -4966,6 +5086,10 @@ return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::STRICT_FSETCC: + return lowerSTRICT_FSETCC(Op, DAG, false); + case ISD::STRICT_FSETCCS: + return lowerSTRICT_FSETCC(Op, DAG, true); case ISD::GlobalAddress: return lowerGlobalAddress(cast(Op), DAG); case ISD::GlobalTLSAddress: @@ -5171,6 +5295,8 @@ OPCODE(IABS); OPCODE(ICMP); OPCODE(FCMP); + OPCODE(STRICT_FCMP); + OPCODE(STRICT_FCMPS); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); @@ -5233,13 +5359,20 @@ OPCODE(VICMPHS); OPCODE(VICMPHLS); OPCODE(VFCMPE); + OPCODE(STRICT_VFCMPE); + OPCODE(STRICT_VFCMPES); OPCODE(VFCMPH); + OPCODE(STRICT_VFCMPH); + OPCODE(STRICT_VFCMPHS); OPCODE(VFCMPHE); + OPCODE(STRICT_VFCMPHE); + OPCODE(STRICT_VFCMPHES); OPCODE(VFCMPES); OPCODE(VFCMPHS); OPCODE(VFCMPHES); OPCODE(VFTCI); OPCODE(VEXTEND); + OPCODE(STRICT_VEXTEND); OPCODE(VROUND); OPCODE(VTM); OPCODE(VFAE_CC); @@ -7554,7 +7687,8 @@ // Replace pseudo with a normal load-and-test that models the def as // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) - .addReg(SrcReg); + .addReg(SrcReg) + .setMIFlags(MI.getFlags()); MI.eraseFromParent(); return MBB; Index: llvm/lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -537,19 +537,19 @@ //===----------------------------------------------------------------------===// let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { - def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>; - def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>; - def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>; + def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; - def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; + def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>; - def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>; - def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>; - def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>; + def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>; + def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>; + def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>; - def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>; - def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>; + def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>; + def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>; } // Test Data Class. Index: llvm/lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1134,7 +1134,7 @@ // Load lengthened. let Uses = [FPC], mayRaiseFPException = 1 in { def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>; def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>; } let Predicates = [FeatureVectorEnhancements1] in { @@ -1364,32 +1364,32 @@ // Compare scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; - def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; - def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>; } } // Compare and signal scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; - def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; - def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>; + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>; } } // Compare equal. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, v128f, v128sb, 2, 0>; defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1401,11 +1401,11 @@ // Compare and signal equal. let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureVectorEnhancements1] in { - defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, + defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, z_strict_vfcmpes, null_frag, v128g, v128db, 3, 4>; defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 12>; - defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag, + defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, z_strict_vfcmpes, null_frag, v128f, v128sb, 2, 4>; defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 12>; @@ -1416,12 +1416,12 @@ // Compare high. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs, v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs, v128f, v128sb, 2, 0>; defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1433,11 +1433,11 @@ // Compare and signal high. let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureVectorEnhancements1] in { - defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, + defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, z_strict_vfcmphs, null_frag, v128g, v128db, 3, 4>; defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 12>; - defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag, + defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, z_strict_vfcmphs, null_frag, v128f, v128sb, 2, 4>; defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 12>; @@ -1448,12 +1448,12 @@ // Compare high or equal. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, v128f, v128sb, 2, 0>; defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1465,11 +1465,11 @@ // Compare and signal high or equal. let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureVectorEnhancements1] in { - defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, + defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, z_strict_vfcmphes, null_frag, v128g, v128db, 3, 4>; defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 12>; - defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag, + defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, z_strict_vfcmphes, null_frag, v128f, v128sb, 2, 4>; defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 12>; Index: llvm/lib/Target/SystemZ/SystemZOperators.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZOperators.td +++ llvm/lib/Target/SystemZ/SystemZOperators.td @@ -258,6 +258,10 @@ def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>; def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; +def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; +def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, + [SDNPHasChain]>; def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain]>; @@ -328,12 +332,26 @@ def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>; def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>; def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; +def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; +def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; @@ -707,6 +725,23 @@ // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; +// Strict floating-point fragments. +def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_fcmp node:$lhs, node:$rhs), + (z_fcmp node:$lhs, node:$rhs)]>; +def z_any_vfcmpe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmpe node:$lhs, node:$rhs), + (z_vfcmpe node:$lhs, node:$rhs)]>; +def z_any_vfcmph : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmph node:$lhs, node:$rhs), + (z_vfcmph node:$lhs, node:$rhs)]>; +def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmphe node:$lhs, node:$rhs), + (z_vfcmphe node:$lhs, node:$rhs)]>; +def z_any_vextend : PatFrags<(ops node:$src), + [(z_strict_vextend node:$src), + (z_vextend node:$src)]>; + // Create a unary operator that loads from memory and then performs // the given operation on it. class loadu Index: llvm/lib/Target/SystemZ/SystemZPatterns.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZPatterns.td +++ llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -148,9 +148,9 @@ // registers in CLS against zero. The instruction has separate R1 and R2 // operands, but they must be the same when the instruction is used like this. multiclass CompareZeroFP { - def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; + def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; // The sign of the zero makes no difference. - def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; + def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; } // Use INSN for performing binary operation OPERATION of type VT Index: llvm/lib/Target/SystemZ/SystemZShortenInst.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -313,6 +313,14 @@ Changed |= shortenOn01(MI, SystemZ::CEBR); break; + case SystemZ::WFKDB: + Changed |= shortenOn01(MI, SystemZ::KDBR); + break; + + case SystemZ::WFKSB: + Changed |= shortenOn01(MI, SystemZ::KEBR); + break; + case SystemZ::VL32: // For z13 we prefer LDE over LE to avoid partial register dependencies. Changed |= shortenOn0(MI, SystemZ::LDE32); Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll @@ -0,0 +1,435 @@ +; Test 32-bit floating-point strict comparison. The tests assume a z10 +; implementation of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare float @foo() + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) #0 { +; CHECK-LABEL: f1: +; CHECK: cebr %f0, %f2 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the CEB range. +define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned CEB range. +define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f3: +; CHECK: ceb %f0, 4092(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f4: +; CHECK: aghi %r4, 4096 +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f5: +; CHECK: aghi %r4, -4 +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that CEB allows indices. +define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) #0 { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r5, 2 +; CHECK: ceb %f0, 400(%r1,%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that comparisons of spilled values can use CEB rather than CEBR. +define float @f7(float *%ptr0) #0 { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() #0 + + %cmp0 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp1 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val1, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp3 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val3, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp4 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val4, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp5 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val5, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp6 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val6, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp7 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val7, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp8 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val8, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp9 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val9, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp10 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val10, + metadata !"olt", + metadata !"fpexcept.strict") #0 + + %sel0 = select i1 %cmp0, float %ret, float 0.0 + %sel1 = select i1 %cmp1, float %sel0, float 1.0 + %sel2 = select i1 %cmp2, float %sel1, float 2.0 + %sel3 = select i1 %cmp3, float %sel2, float 3.0 + %sel4 = select i1 %cmp4, float %sel3, float 4.0 + %sel5 = select i1 %cmp5, float %sel4, float 5.0 + %sel6 = select i1 %cmp6, float %sel5, float 6.0 + %sel7 = select i1 %cmp7, float %sel6, float 7.0 + %sel8 = select i1 %cmp8, float %sel7, float 8.0 + %sel9 = select i1 %cmp9, float %sel8, float 9.0 + %sel10 = select i1 %cmp10, float %sel9, float 10.0 + + ret float %sel10 +} + +; Check comparison with zero. +define i64 @f8(i64 %a, i64 %b, float %f) #0 { +; CHECK-LABEL: f8: +; CHECK: ltebr %f0, %f0 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows CEB to be used, +; first with oeq. +define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f9: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then one. +define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f10: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then olt. +define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f11: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ole. +define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f12: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then oge. +define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f13: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnle %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ogt. +define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f14: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ueq. +define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f15: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnlhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrlh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then une. +define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f16: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bner %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgre %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ult. +define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f17: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrle %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ule. +define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f18: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnlr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrl %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then uge. +define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f19: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ugt. +define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f20: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrhe %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll @@ -0,0 +1,249 @@ +; Test 64-bit floating-point strict comparison. The tests assume a z10 +; implementation of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare double @foo() + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) #0 { +; CHECK-LABEL: f1: +; CHECK: cdbr %f0, %f2 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the CDB range. +define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned CDB range. +define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f3: +; CHECK: cdb %f0, 4088(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f4: +; CHECK: aghi %r4, 4096 +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f5: +; CHECK: aghi %r4, -8 +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that CDB allows indices. +define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) #0 { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r5, 3 +; CHECK: cdb %f0, 800(%r1,%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that comparisons of spilled values can use CDB rather than CDBR. +define double @f7(double *%ptr0) #0 { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() #0 + + %cmp0 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp1 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val1, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp3 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val3, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp4 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val4, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp5 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val5, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp6 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val6, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp7 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val7, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp8 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val8, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp9 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val9, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp10 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val10, + metadata !"olt", + metadata !"fpexcept.strict") #0 + + %sel0 = select i1 %cmp0, double %ret, double 0.0 + %sel1 = select i1 %cmp1, double %sel0, double 1.0 + %sel2 = select i1 %cmp2, double %sel1, double 2.0 + %sel3 = select i1 %cmp3, double %sel2, double 3.0 + %sel4 = select i1 %cmp4, double %sel3, double 4.0 + %sel5 = select i1 %cmp5, double %sel4, double 5.0 + %sel6 = select i1 %cmp6, double %sel5, double 6.0 + %sel7 = select i1 %cmp7, double %sel6, double 7.0 + %sel8 = select i1 %cmp8, double %sel7, double 8.0 + %sel9 = select i1 %cmp9, double %sel8, double 9.0 + %sel10 = select i1 %cmp10, double %sel9, double 10.0 + + ret double %sel10 +} + +; Check comparison with zero. +define i64 @f8(i64 %a, i64 %b, double %f) #0 { +; CHECK-LABEL: f8: +; CHECK-SCALAR: ltdbr %f0, %f0 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR: ltdbr %f0, %f0 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows CDB to be used, +define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) #0 { +; CHECK-LABEL: f9: +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 +; CHECK: br %r14 + %f1 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll @@ -0,0 +1,47 @@ +; Test 128-bit floating-point strict comparison. The tests assume a z10 +; implementation of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r4) +; CHECK-DAG: ld %f3, 8(%r4) +; CHECK: cxbr %f1, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f2x = fpext float %f2 to fp128 + %f1 = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2x, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: ld %f0, 0(%r4) +; CHECK: ld %f2, 8(%r4) +; CHECK: ltxbr %f0, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f, fp128 0xL00000000000000000000000000000000, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll @@ -0,0 +1,524 @@ +; Test that floating-point strict compares are omitted if CC already has the +; right value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: -enable-misched=0 -no-integrated-as | FileCheck %s +; +; We need -enable-misched=0 to make sure f12 and following routines really +; test the compare elimination pass. + + +declare float @llvm.fabs.f32(float %f) + +; Test addition followed by EQ, which can use the CC result of the addition. +define float @f1(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with LT. +define float @f2(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f2: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with GT. +define float @f3(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f3: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: bhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with UEQ. +define float @f4(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f4: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: bnlhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Subtraction also provides a zero-based CC value. +define float @f5(float %a, float %b, float *%dest) { +; CHECK-LABEL: f5: +; CHECK: seb %f0, 0(%r2) +; CHECK-NEXT: bnher %r14 +; CHECK: br %r14 +entry: + %cur = load float, float *%dest + %res = call float @llvm.experimental.constrained.fsub.f32( + float %a, float %cur, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ult", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD POSITIVE. We cannot omit the LTEBR. +define float @f6(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f6: +; CHECK: lpdfr %f0, %f2 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: bhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.fabs.f32(float %a) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD NEGATIVE. We cannot omit the LTEBR. +define float @f7(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f7: +; CHECK: lndfr %f0, %f2 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %abs = call float @llvm.fabs.f32(float %a) + %res = fneg float %abs + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD COMPLEMENT. We cannot omit the LTEBR. +define float @f8(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f8: +; CHECK: lcdfr %f0, %f2 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: bler %r14 +; CHECK: br %r14 +entry: + %res = fneg float %a + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ole", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Multiplication (for example) does not modify CC. +define float @f9(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f9: +; CHECK: meebr %f0, %f2 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: blhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fmul.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"one", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test a combination involving a CC-setting instruction followed by +; a non-CC-setting instruction. +define float @f10(float %a, float %b, float %c, float *%dest) #0 { +; CHECK-LABEL: f10: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: debr %f0, %f4 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: bner %r14 +; CHECK: br %r14 +entry: + %add = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %add, float %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"une", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test a case where CC is set based on a different register from the +; compare input. +define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) #0 { +; CHECK-LABEL: f11: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: sebr %f4, %f0 +; CHECK-DAG: ste %f4, 0(%r2) +; CHECK-DAG: ltebr %f0, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 +entry: + %add = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %sub = call float @llvm.experimental.constrained.fsub.f32( + float %c, float %add, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %sub, float *%dest1 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %add, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %sub, float *%dest2 + br label %exit + +exit: + ret float %add +} + +; Test that LER gets converted to LTEBR where useful. +define float @f12(float %dummy, float %val) #0 { +; CHECK-LABEL: f12: +; CHECK: ltebr %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %ret = call float asm "blah $1", "=f,{f0}"(float %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %val, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret float %ret +} + +; Test that LDR gets converted to LTDBR where useful. +define double @f13(double %dummy, double %val) #0 { +; CHECK-LABEL: f13: +; CHECK: ltdbr %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %ret = call double asm "blah $1", "=f,{f0}"(double %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f64( + double %val, double 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret double %ret +} + +; Test that LXR gets converted to LTXBR where useful. +define void @f14(fp128 *%ptr1, fp128 *%ptr2) #0 { +; CHECK-LABEL: f14: +; CHECK: ltxbr +; CHECK-NEXT: dxbr +; CHECK-NEXT: std +; CHECK-NEXT: std +; CHECK-NEXT: mxbr +; CHECK-NEXT: std +; CHECK-NEXT: std +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %val1 = load fp128, fp128 *%ptr1 + %val2 = load fp128, fp128 *%ptr2 + %div = fdiv fp128 %val1, %val2 + store fp128 %div, fp128 *%ptr1 + %mul = fmul fp128 %val1, %val2 + store fp128 %mul, fp128 *%ptr2 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %val1, fp128 0xL00000000000000000000000000000000, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret void +} + +; Test a case where it is the source rather than destination of LER that +; we need. +define float @f15(float %val, float %dummy) #0 { +; CHECK-LABEL: f15: +; CHECK: ltebr %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %ret = call float asm "blah $1", "=f,{f2}"(float %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %val, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret float %ret +} + +; Test a case where it is the source rather than destination of LDR that +; we need. +define double @f16(double %val, double %dummy) #0 { +; CHECK-LABEL: f16: +; CHECK: ltdbr %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %ret = call double asm "blah $1", "=f,{f2}"(double %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f64( + double %val, double 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret double %ret +} + +; Repeat f2 with a comparison against -0. +define float @f17(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f17: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float -0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Verify that we cannot omit the compare if there may be an intervening +; change to the exception flags. +define float @f18(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f18: +; CHECK: aebr %f0, %f2 +; CHECK: ltebr %f0, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + call void asm sideeffect "blah", ""() + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Verify that we cannot convert LER to LTEBR and omit the compare if +; there may be an intervening change to the exception flags. +define float @f19(float %dummy, float %val) #0 { +; CHECK-LABEL: f19: +; CHECK: ler %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ltebr %f2, %f2 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %ret = call float asm sideeffect "blah $1", "=f,{f0}"(float %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %val, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret float %ret +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll @@ -0,0 +1,103 @@ +; Test that floating-point instructions that set cc are *not* used to +; eliminate *strict* compares for load complement, load negative and load +; positive +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Load complement (sign-bit flipped). +; Test f32 +define float @f1(float %a, float %b, float %f) #0 { +; CHECK-LABEL: f1: +; CHECK: ltebr +; CHECK-NEXT: ber %r14 + %neg = fneg float %f + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %neg, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, float %a, float %b + ret float %res +} + +; Test f64 +define double @f2(double %a, double %b, double %f) #0 { +; CHECK-LABEL: f2: +; CHECK: ltdbr +; CHECK-NEXT: ber %r14 + %neg = fneg double %f + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %neg, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Negation of floating-point absolute. +; Test f32 +declare float @llvm.fabs.f32(float %f) +define float @f3(float %a, float %b, float %f) #0 { +; CHECK-LABEL: f3: +; CHECK: ltebr +; CHECK-NEXT: ber %r14 + %abs = call float @llvm.fabs.f32(float %f) + %neg = fneg float %abs + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %neg, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, float %a, float %b + ret float %res +} + +; Test f64 +declare double @llvm.fabs.f64(double %f) +define double @f4(double %a, double %b, double %f) #0 { +; CHECK-LABEL: f4: +; CHECK: ltdbr +; CHECK-NEXT: ber %r14 + %abs = call double @llvm.fabs.f64(double %f) + %neg = fneg double %abs + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %neg, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Absolute floating-point value. +; Test f32 +define float @f5(float %a, float %b, float %f) #0 { +; CHECK-LABEL: f5: +; CHECK: ltebr +; CHECK-NEXT: ber %r14 + %abs = call float @llvm.fabs.f32(float %f) + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %abs, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, float %a, float %b + ret float %res +} + +; Test f64 +define double @f6(double %a, double %b, double %f) #0 { +; CHECK-LABEL: f6: +; CHECK: ltdbr +; CHECK-NEXT: ber %r14 + %abs = call double @llvm.fabs.f64(double %f) + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %abs, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll @@ -0,0 +1,44 @@ +; Test f128 strict comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr1, fp128 *%ptr2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r5) +; CHECK: wfcxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero -- it is not worthwhile to copy to +; FP pairs just so we can use LTXBR, so simply load up a zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfcxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f, fp128 0xL00000000000000000000000000000000, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll @@ -0,0 +1,436 @@ +; Test 32-bit floating-point signaling comparison. The tests assume a z10 +; implementation of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare float @foo() + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) #0 { +; CHECK-LABEL: f1: +; CHECK: kebr %f0, %f2 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the KEB range. +define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned KEB range. +define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f3: +; CHECK: keb %f0, 4092(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f4: +; CHECK: aghi %r4, 4096 +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f5: +; CHECK: aghi %r4, -4 +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that KEB allows indices. +define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) #0 { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r5, 2 +; CHECK: keb %f0, 400(%r1,%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that comparisons of spilled values can use KEB rather than KEBR. +define float @f7(float *%ptr0) #0 { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: keb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() #0 + + %cmp0 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp1 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val1, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp3 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val3, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp4 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val4, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp5 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val5, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp6 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val6, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp7 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val7, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp8 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val8, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp9 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val9, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp10 = call i1 @llvm.experimental.constrained.fcmps.f32( + float %ret, float %val10, + metadata !"olt", + metadata !"fpexcept.strict") #0 + + %sel0 = select i1 %cmp0, float %ret, float 0.0 + %sel1 = select i1 %cmp1, float %sel0, float 1.0 + %sel2 = select i1 %cmp2, float %sel1, float 2.0 + %sel3 = select i1 %cmp3, float %sel2, float 3.0 + %sel4 = select i1 %cmp4, float %sel3, float 4.0 + %sel5 = select i1 %cmp5, float %sel4, float 5.0 + %sel6 = select i1 %cmp6, float %sel5, float 6.0 + %sel7 = select i1 %cmp7, float %sel6, float 7.0 + %sel8 = select i1 %cmp8, float %sel7, float 8.0 + %sel9 = select i1 %cmp9, float %sel8, float 9.0 + %sel10 = select i1 %cmp10, float %sel9, float 10.0 + + ret float %sel10 +} + +; Check comparison with zero - cannot use LOAD AND TEST. +define i64 @f8(i64 %a, i64 %b, float %f) #0 { +; CHECK-LABEL: f8: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK-NEXT: kebr %f0, [[REG]] +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows KEB to be used, +; first with oeq. +define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f9: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then one. +define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f10: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then olt. +define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f11: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ole. +define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f12: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then oge. +define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f13: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnle %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ogt. +define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f14: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ueq. +define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f15: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnlhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrlh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then une. +define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f16: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bner %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgre %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ult. +define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f17: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrle %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ule. +define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f18: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnlr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrl %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then uge. +define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f19: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ugt. +define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f20: +; CHECK: keb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrhe %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll @@ -0,0 +1,249 @@ +; Test 64-bit floating-point signaling comparison. The tests assume a z10 +; implementation of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare double @foo() + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) #0 { +; CHECK-LABEL: f1: +; CHECK: kdbr %f0, %f2 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the KDB range. +define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: kdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned KDB range. +define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f3: +; CHECK: kdb %f0, 4088(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f4: +; CHECK: aghi %r4, 4096 +; CHECK: kdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f5: +; CHECK: aghi %r4, -8 +; CHECK: kdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that KDB allows indices. +define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) #0 { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r5, 3 +; CHECK: kdb %f0, 800(%r1,%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that comparisons of spilled values can use KDB rather than KDBR. +define double @f7(double *%ptr0) #0 { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: kdb {{%f[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() #0 + + %cmp0 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp1 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val1, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp3 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val3, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp4 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val4, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp5 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val5, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp6 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val6, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp7 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val7, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp8 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val8, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp9 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val9, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp10 = call i1 @llvm.experimental.constrained.fcmps.f64( + double %ret, double %val10, + metadata !"olt", + metadata !"fpexcept.strict") #0 + + %sel0 = select i1 %cmp0, double %ret, double 0.0 + %sel1 = select i1 %cmp1, double %sel0, double 1.0 + %sel2 = select i1 %cmp2, double %sel1, double 2.0 + %sel3 = select i1 %cmp3, double %sel2, double 3.0 + %sel4 = select i1 %cmp4, double %sel3, double 4.0 + %sel5 = select i1 %cmp5, double %sel4, double 5.0 + %sel6 = select i1 %cmp6, double %sel5, double 6.0 + %sel7 = select i1 %cmp7, double %sel6, double 7.0 + %sel8 = select i1 %cmp8, double %sel7, double 8.0 + %sel9 = select i1 %cmp9, double %sel8, double 9.0 + %sel10 = select i1 %cmp10, double %sel9, double 10.0 + + ret double %sel10 +} + +; Check comparison with zero - cannot use LOAD AND TEST. +define i64 @f8(i64 %a, i64 %b, double %f) #0 { +; CHECK-LABEL: f8: +; CHECK: lzdr [[REG:%f[0-9]+]] +; CHECK-NEXT: kdbr %f0, [[REG]] +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows KDB to be used, +define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) #0 { +; CHECK-LABEL: f9: +; CHECK: kdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 +; CHECK: br %r14 + %f1 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll @@ -0,0 +1,48 @@ +; Test 128-bit floating-point signaling comparison. The tests assume a z10 +; implementation of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r4) +; CHECK-DAG: ld %f3, 8(%r4) +; CHECK: kxbr %f1, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f2x = fpext float %f2 to fp128 + %f1 = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f128( + fp128 %f1, fp128 %f2x, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero - cannot use LOAD AND TEST. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: ld %f0, 0(%r4) +; CHECK-DAG: ld %f2, 8(%r4) +; CHECK-DAG: lzxr [[REG:%f[0-9]+]] +; CHECK-NEXT: kxbr %f0, [[REG]] +; CHECK-NEXT: ber %r14 +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f128( + fp128 %f, fp128 0xL00000000000000000000000000000000, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll @@ -0,0 +1,148 @@ +; Verify that floating-point strict signaling compares cannot be omitted +; even if CC already has the right value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: -enable-misched=0 -no-integrated-as | FileCheck %s +; +; We need -enable-misched=0 to make sure f12 and following routines really +; test the compare elimination pass. + + +declare float @llvm.fabs.f32(float %f) + +; Test addition followed by EQ, which could use the CC result of the addition. +define float @f1(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: aebr %f0, %f2 +; CHECK-DAG: lzer [[REG:%f[0-9]+]] +; CHECK-NEXT: kebr %f0, [[REG]] +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmps.f32( + float %res, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD POSITIVE. +define float @f6(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f6: +; CHECK-DAG: lpdfr %f0, %f2 +; CHECK-DAG: lzer [[REG:%f[0-9]+]] +; CHECK-NEXT: kebr %f0, [[REG]] +; CHECK-NEXT: bhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.fabs.f32(float %a) + %cmp = call i1 @llvm.experimental.constrained.fcmps.f32( + float %res, float 0.0, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD NEGATIVE. +define float @f7(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: lndfr %f0, %f2 +; CHECK-DAG: lzer [[REG:%f[0-9]+]] +; CHECK-NEXT: kebr %f0, [[REG]] +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %abs = call float @llvm.fabs.f32(float %a) + %res = fneg float %abs + %cmp = call i1 @llvm.experimental.constrained.fcmps.f32( + float %res, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD COMPLEMENT. +define float @f8(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f8: +; CHECK-DAG: lcdfr %f0, %f2 +; CHECK-DAG: lzer [[REG:%f[0-9]+]] +; CHECK-NEXT: kebr %f0, [[REG]] +; CHECK-NEXT: bler %r14 +; CHECK: br %r14 +entry: + %res = fneg float %a + %cmp = call i1 @llvm.experimental.constrained.fcmps.f32( + float %res, float 0.0, + metadata !"ole", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test that LER does not get converted to LTEBR. +define float @f12(float %dummy, float %val) #0 { +; CHECK-LABEL: f12: +; CHECK: ler %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lzer [[REG:%f[0-9]+]] +; CHECK-NEXT: kebr %f2, [[REG]] +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %ret = call float asm "blah $1", "=f,{f0}"(float %val) + %cmp = call i1 @llvm.experimental.constrained.fcmps.f32( + float %val, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret float %ret +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll @@ -0,0 +1,103 @@ +; Test that floating-point instructions that set cc are *not* used to +; eliminate *strict* signaling compares for load complement, load negative +; and load positive +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Load complement (sign-bit flipped). +; Test f32 +define float @f1(float %a, float %b, float %f) #0 { +; CHECK-LABEL: f1: +; CHECK: kebr +; CHECK-NEXT: ber %r14 + %neg = fneg float %f + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %neg, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, float %a, float %b + ret float %res +} + +; Test f64 +define double @f2(double %a, double %b, double %f) #0 { +; CHECK-LABEL: f2: +; CHECK: kdbr +; CHECK-NEXT: ber %r14 + %neg = fneg double %f + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %neg, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Negation of floating-point absolute. +; Test f32 +declare float @llvm.fabs.f32(float %f) +define float @f3(float %a, float %b, float %f) #0 { +; CHECK-LABEL: f3: +; CHECK: kebr +; CHECK-NEXT: ber %r14 + %abs = call float @llvm.fabs.f32(float %f) + %neg = fneg float %abs + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %neg, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, float %a, float %b + ret float %res +} + +; Test f64 +declare double @llvm.fabs.f64(double %f) +define double @f4(double %a, double %b, double %f) #0 { +; CHECK-LABEL: f4: +; CHECK: kdbr +; CHECK-NEXT: ber %r14 + %abs = call double @llvm.fabs.f64(double %f) + %neg = fneg double %abs + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %neg, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Absolute floating-point value. +; Test f32 +define float @f5(float %a, float %b, float %f) #0 { +; CHECK-LABEL: f5: +; CHECK: kebr +; CHECK-NEXT: ber %r14 + %abs = call float @llvm.fabs.f32(float %f) + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %abs, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, float %a, float %b + ret float %res +} + +; Test f64 +define double @f6(double %a, double %b, double %f) #0 { +; CHECK-LABEL: f6: +; CHECK: kdbr +; CHECK-NEXT: ber %r14 + %abs = call double @llvm.fabs.f64(double %f) + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %abs, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll @@ -0,0 +1,44 @@ +; Test f128 signaling comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr1, fp128 *%ptr2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r5) +; CHECK: wfkxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmps.f128( + fp128 %f1, fp128 %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero -- it is not worthwhile to copy to +; FP pairs just so we can use LTXBR, so simply load up a zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfkxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmps.f128( + fp128 %f, fp128 0xL00000000000000000000000000000000, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll @@ -0,0 +1,560 @@ +; Test strict v4f32 comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vo %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vno %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vo %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vno %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll @@ -0,0 +1,442 @@ +; Test f64 and v2f64 strict comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test oeq. +define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: vfcedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test one. +define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ogt. +define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK: vfchdb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oge. +define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK: vfchedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ole. +define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK: vfchedb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test olt. +define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK: vfchdb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ueq. +define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test une. +define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ugt. +define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uge. +define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ule. +define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ult. +define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ord. +define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uno. +define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oeq selects. +define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test one selects. +define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ogt selects. +define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test oge selects. +define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ole selects. +define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test olt selects. +define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ueq selects. +define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test une selects. +define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ugt selects. +define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uge selects. +define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ule selects. +define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ult selects. +define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ord selects. +define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uno selects. +define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test an f64 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 { +; CHECK-LABEL: f29: +; CHECK: wfcdb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <2 x double> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll @@ -0,0 +1,442 @@ +; Test strict f32 and v4f32 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: vfcesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK: vfchsb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK: vfchesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK: vfchesb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK: vfchsb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test an f32 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) #0 { +; CHECK-LABEL: f29: +; CHECK: wfcsb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <4 x float> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll @@ -0,0 +1,442 @@ +; Test signaling f32 and v4f32 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: vfkesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK: vfkhsb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK: vfkhesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK: vfkhesb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK: vfkhsb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK: vfkesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK: vfkhesb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK: vfkhsb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK: vfkhsb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK: vfkhesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vfkesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vfkhsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vfkhesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vfkhesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vfkhsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vfkesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vfkhesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vfkhsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vfkhsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vfkhesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test an f32 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) #0 { +; CHECK-LABEL: f29: +; CHECK: wfksb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <4 x float> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmps.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll @@ -0,0 +1,442 @@ +; Test f64 and v2f64 signaling comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test oeq. +define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: vfkedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test one. +define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ogt. +define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK: vfkhdb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oge. +define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK: vfkhedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ole. +define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK: vfkhedb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test olt. +define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK: vfkhdb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ueq. +define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test une. +define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK: vfkedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ugt. +define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK: vfkhedb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uge. +define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK: vfkhdb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ule. +define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK: vfkhdb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ult. +define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK: vfkhedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ord. +define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uno. +define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oeq selects. +define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vfkedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test one selects. +define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ogt selects. +define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vfkhdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test oge selects. +define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vfkhedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ole selects. +define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vfkhedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test olt selects. +define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vfkhdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ueq selects. +define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test une selects. +define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vfkedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ugt selects. +define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vfkhedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uge selects. +define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vfkhdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ule selects. +define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vfkhdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ult selects. +define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vfkhedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ord selects. +define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uno selects. +define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test an f64 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 { +; CHECK-LABEL: f29: +; CHECK: wfkdb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <2 x double> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll @@ -0,0 +1,56 @@ +; Test signaling vector floating-point comparisons on z13. +; Note that these must be scalarized as we do not have native instructions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test v4f32. +define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: kebr +; CHECK: kebr +; CHECK: kebr +; CHECK: kebr +; CHECK: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test v2f64. +define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK: {{kdbr|wfkdb}} +; CHECK: {{kdbr|wfkdb}} +; CHECK: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test an f64 comparison that uses vector registers. +define i64 @f3(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 { +; CHECK-LABEL: f3: +; CHECK: wfkdb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <2 x double> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmps.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) +