Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -330,6 +330,10 @@ /// It is used to limit optimizations while the DAG is being optimized. STRICT_FP_EXTEND, + /// STRICT_FSETCC - Constrained version of SETCC, used for floating-point + /// operands only. + STRICT_FSETCC, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -722,6 +722,7 @@ case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FSETCC: return true; } } Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -976,6 +976,7 @@ case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FSETCC: EqOpc = ISD::SETCC; break; } return getOperationAction(EqOpc, VT); Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -249,6 +249,25 @@ case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: + case Intrinsic::experimental_constrained_fcmp: + return true; + default: return false; + } + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + + /// Constrained floating point comapre intrinsics. + class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic { + public: + FCmpInst::Predicate getPredicate() const; + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::experimental_constrained_fcmp: return true; default: return false; } Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -743,6 +743,13 @@ [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + + // Constrained floating-point comparison. This takes the predicate + // as third operand in the form of a metadata string. + def int_experimental_constrained_fcmp + : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + [ llvm_anyfloat_ty, LLVMMatchType<0>, llvm_metadata_ty, + llvm_metadata_ty ]>; } // FIXME: Add intrinsic for fcmp. // FIXME: Consider maybe adding intrinsics for sitofp, uitofp. Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9613,6 +9613,41 @@ } } + // Some of the transformations above are also valid for STRICT_FSETCC. + if (N0.getOpcode() == ISD::STRICT_FSETCC) { + SDValue Chain = N0.getOperand(0); + SDValue N00 = N0.getOperand(1); + SDValue N01 = N0.getOperand(2); + SDValue N02 = N0.getOperand(3); + EVT N00VT = N0.getOperand(1).getValueType(); + + // sext(strict_fsetcc) -> sext_in_reg(strict_fsetcc) for vectors. + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(N00VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + EVT SVT = getSetCCResultType(N00VT); + + if (SVT != N0.getValueType()) { + if (VT.getSizeInBits() == SVT.getSizeInBits()) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + SDValue VSetCC = DAG.getNode(ISD::STRICT_FSETCC, DL, VTs, + Chain, N00, N01, N02); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), VSetCC.getValue(1)); + return VSetCC; + } + + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (SVT == MatchingVecType) { + SDVTList VTs = DAG.getVTList(MatchingVecType, MVT::Other); + SDValue VSetCC = DAG.getNode(ISD::STRICT_FSETCC, DL, VTs, + Chain, N00, N01, N02); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), VSetCC.getValue(1)); + return DAG.getSExtOrTrunc(VSetCC, DL, VT); + } + } + } + } + // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1036,11 +1036,14 @@ Node->getOperand(2).getValueType()); break; case ISD::SELECT_CC: + case ISD::STRICT_FSETCC: case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 3 : Node->getOpcode() == ISD::SETCC ? 2 : 1; - unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 1 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -75,6 +75,7 @@ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; @@ -816,7 +817,9 @@ } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT InVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + int InOpNo = IsStrict? 1 : 0; + EVT InVT = N->getOperand(InOpNo).getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT SVT = getSetCCResultType(InVT); @@ -835,12 +838,22 @@ } SDLoc dl(N); - assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + assert(SVT.isVector() == N->getOperand(InOpNo).getValueType().isVector() && "Vector compare must return a vector result!"); // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC; + if (IsStrict) { + EVT VTs[] = {SVT, MVT::Other}; + SDValue Opers[] = {N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)}; + SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1)); + } else + SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -338,6 +338,7 @@ case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FSETCC: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); // If we're asked to expand a strict vector floating-point operation, // by default we're going to simply unroll it. That is usually the @@ -864,6 +865,7 @@ case ISD::STRICT_FTRUNC: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FSETCC: return ExpandStrictFPOp(Op); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1371,7 +1373,13 @@ unsigned NumElems = VT.getVectorNumElements(); unsigned NumOpers = Op.getNumOperands(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ValueVTs[] = {EltVT, MVT::Other}; + + EVT TmpEltVT = EltVT; + if (Op->getOpcode() == ISD::STRICT_FSETCC) + TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT); + + EVT ValueVTs[] = {TmpEltVT, MVT::Other}; SDValue Chain = Op.getOperand(0); SDLoc dl(Op); @@ -1398,9 +1406,17 @@ } SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + SDValue ScalarResult = ScalarOp.getValue(0); + SDValue ScalarChain = ScalarOp.getValue(1); + + if (Op->getOpcode() == ISD::STRICT_FSETCC) + ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); - OpValues.push_back(ScalarOp.getValue(0)); - OpChains.push_back(ScalarOp.getValue(1)); + OpValues.push_back(ScalarResult); + OpChains.push_back(ScalarChain); } SDValue Result = DAG.getBuildVector(VT, dl, OpValues); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -174,6 +174,7 @@ case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FSETCC: R = ScalarizeVecRes_StrictFPOp(N); break; case ISD::UADDO: @@ -989,6 +990,7 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FSETCC: SplitVecRes_StrictFPOp(N, Lo, Hi); break; case ISD::UADDO: @@ -2799,6 +2801,7 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FSETCC: Res = WidenVecRes_StrictFP(N); break; @@ -3799,6 +3802,16 @@ WidenVT, N->getOperand(0)); } +// Return true is this is a SETCC node or a strict version of it. +static inline bool isSETCCOp(unsigned Opcode) { + switch (Opcode) { + case ISD::SETCC: + case ISD::STRICT_FSETCC: + return true; + } + return false; +} + // Return true if this is a node that could have two SETCCs as operands. static inline bool isLogicalMaskOp(unsigned Opcode) { switch (Opcode) { @@ -3810,6 +3823,15 @@ return false; } +// If N is a SETCC or a strict variant of it, return the type +// of the compare operands. +static inline EVT getSETCCOperandType(SDValue N) { + if (N->isStrictFPOpcode()) + return N->getOperand(1).getValueType(); + else + return N->getOperand(0).getValueType(); +} + // This is used just for the assert in convertMask(). Check that this either // a SETCC or a previously handled SETCC by convertMask(). #ifndef NDEBUG @@ -3832,7 +3854,7 @@ return isSETCCorConvertedSETCC(N.getOperand(0)) && isSETCCorConvertedSETCC(N.getOperand(1)); - return (N.getOpcode() == ISD::SETCC || + return (isSETCCOp(N.getOpcode()) || ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -3847,10 +3869,17 @@ assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. + SDValue Mask; SmallVector Ops; for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); + if (InMask->isStrictFPOpcode()) { + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), + { MaskVT, MVT::Other }, Ops); + ReplaceValueWith(InMask.getValue(1), Mask.getValue(1)); + } + else + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. @@ -3903,7 +3932,7 @@ if (N->getOpcode() != ISD::VSELECT) return SDValue(); - if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode())) + if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode())) return SDValue(); // If this is a splitted VSELECT that was previously already handled, do @@ -3926,8 +3955,8 @@ return SDValue(); // If there is support for an i1 vector mask, don't touch. - if (Cond.getOpcode() == ISD::SETCC) { - EVT SetCCOpVT = Cond->getOperand(0).getValueType(); + if (isSETCCOp(Cond.getOpcode())) { + EVT SetCCOpVT = getSETCCOperandType(Cond); while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); EVT SetCCResVT = getSetCCResultType(SetCCOpVT); @@ -3958,17 +3987,17 @@ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); SDValue Mask; - if (Cond->getOpcode() == ISD::SETCC) { - EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType()); + if (isSETCCOp(Cond->getOpcode())) { + EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond)); Mask = convertMask(Cond, MaskVT, ToMaskVT); } else if (isLogicalMaskOp(Cond->getOpcode()) && - Cond->getOperand(0).getOpcode() == ISD::SETCC && - Cond->getOperand(1).getOpcode() == ISD::SETCC) { + isSETCCOp(Cond->getOperand(0).getOpcode()) && + isSETCCOp(Cond->getOperand(1).getOpcode())) { // Cond is (AND/OR/XOR (SETCC, SETCC)) SDValue SETCC0 = Cond->getOperand(0); SDValue SETCC1 = Cond->getOperand(1); - EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType()); - EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType()); + EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0)); + EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1)); unsigned ScalarBits0 = VT0.getScalarSizeInBits(); unsigned ScalarBits1 = VT1.getScalarSizeInBits(); unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7783,6 +7783,7 @@ case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_FSETCC: NewOpc = ISD::SETCC; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6164,6 +6164,7 @@ case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: + case Intrinsic::experimental_constrained_fcmp: visitConstrainedFPIntrinsic(cast(I)); return; case Intrinsic::fmuladd: { @@ -7027,6 +7028,12 @@ case Intrinsic::experimental_constrained_trunc: Opcode = ISD::STRICT_FTRUNC; break; + case Intrinsic::experimental_constrained_fcmp: { + Opcode = ISD::STRICT_FSETCC; + auto Pred = dyn_cast(&FPI)->getPredicate(); + Opers.push_back(DAG.getCondCode(getFCmpCondCode(Pred))); + break; + } } SDVTList VTs = DAG.getVTList(ValueVTs); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -270,6 +270,7 @@ case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCCARRY: return "setcccarry"; + case ISD::STRICT_FSETCC: return "strict_fsetcc"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -726,6 +726,7 @@ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); + setOperationAction(ISD::STRICT_FSETCC, VT, Expand); // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); Index: llvm/lib/IR/IntrinsicInst.cpp =================================================================== --- llvm/lib/IR/IntrinsicInst.cpp +++ llvm/lib/IR/IntrinsicInst.cpp @@ -121,6 +121,30 @@ return StrToExceptionBehavior(cast(MD)->getString()); } +FCmpInst::Predicate +ConstrainedFPCmpIntrinsic::getPredicate() const { + Metadata *MD = + cast(getArgOperand(2))->getMetadata(); + if (!MD || !isa(MD)) + return FCmpInst::BAD_FCMP_PREDICATE; + return StringSwitch(cast(MD)->getString()) + .Case("oeq", FCmpInst::FCMP_OEQ) + .Case("ogt", FCmpInst::FCMP_OGT) + .Case("oge", FCmpInst::FCMP_OGE) + .Case("olt", FCmpInst::FCMP_OLT) + .Case("ole", FCmpInst::FCMP_OLE) + .Case("one", FCmpInst::FCMP_ONE) + .Case("ord", FCmpInst::FCMP_ORD) + .Case("uno", FCmpInst::FCMP_UNO) + .Case("ueq", FCmpInst::FCMP_UEQ) + .Case("ugt", FCmpInst::FCMP_UGT) + .Case("uge", FCmpInst::FCMP_UGE) + .Case("ult", FCmpInst::FCMP_ULT) + .Case("ule", FCmpInst::FCMP_ULE) + .Case("une", FCmpInst::FCMP_UNE) + .Default(FCmpInst::BAD_FCMP_PREDICATE); +} + bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -4330,6 +4330,7 @@ case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: + case Intrinsic::experimental_constrained_fcmp: visitConstrainedFPIntrinsic(cast(Call)); break; case Intrinsic::dbg_declare: // llvm.dbg.declare @@ -4827,6 +4828,16 @@ HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_fcmp: { + Assert((NumOperands == 4), "invalid arguments for constrained FP intrinsic", + &FPI); + auto Pred = dyn_cast(&FPI)->getPredicate(); + Assert(CmpInst::isFPPredicate(Pred), + "invalid predicate for constrained FP comparison intrinsic", &FPI); + HasExceptionMD = true; + break; + } + case Intrinsic::experimental_constrained_fptosi: case Intrinsic::experimental_constrained_fptoui: { Assert((NumOperands == 2), Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -58,7 +58,7 @@ ICMP, // Floating-point comparisons. The two operands are the values to compare. - FCMP, + FCMP, STRICT_FCMP, // Test under mask. The first operand is ANDed with the second operand // and the condition codes are set on the result. The third operand is @@ -248,9 +248,9 @@ // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and // greater than" and VFCMPHE for "ordered and greater than or equal to". - VFCMPE, - VFCMPH, - VFCMPHE, + VFCMPE, STRICT_VFCMPE, + VFCMPH, STRICT_VFCMPH, + VFCMPHE, STRICT_VFCMPHE, // Likewise, but also set the condition codes on the result. VFCMPES, @@ -262,7 +262,7 @@ // Extend the even f32 elements of vector operand 0 to produce a vector // of f64 elements. - VEXTEND, + VEXTEND, STRICT_VEXTEND, // Round the f64 elements of vector operand 0 to f32s and store them in the // even elements of the result. @@ -530,11 +530,13 @@ // Implement LowerOperation for individual opcodes. SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue CmpOp0, SDValue CmpOp1) const; + SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const; SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, - SDValue CmpOp0, SDValue CmpOp1) const; + SDValue CmpOp0, SDValue CmpOp1, + SDValue Chain = SDValue()) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -32,12 +32,16 @@ namespace { // Represents information about a comparison. struct Comparison { - Comparison(SDValue Op0In, SDValue Op1In) - : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) + : Op0(Op0In), Op1(Op1In), Chain(ChainIn), + Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} // The operands to the comparison. SDValue Op0, Op1; + // Chain if this is a strict floating-point comparison. + SDValue Chain; + // The opcode that should be used to compare Op0 and Op1. unsigned Opcode; @@ -132,6 +136,7 @@ if (isTypeLegal(VT)) { // Lower SET_CC into an IPM-based sequence. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); @@ -373,6 +378,7 @@ // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); } } @@ -2164,6 +2170,10 @@ // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { + // This optimization is invalid for strict comparisons, since FNEG + // does not raise any exceptions. + if (C.Chain) + return; auto *C1 = dyn_cast(C.Op1); if (C1 && C1->isZero()) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -2451,7 +2461,7 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond) { - Comparison C(Call, SDValue()); + Comparison C(Call, SDValue(), SDValue()); C.Opcode = Opcode; C.CCValid = CCValid; if (Cond == ISD::SETEQ) @@ -2482,8 +2492,10 @@ // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, const SDLoc &DL) { + ISD::CondCode Cond, const SDLoc &DL, + SDValue Chain = SDValue()) { if (CmpOp1.getOpcode() == ISD::Constant) { + assert(!Chain); uint64_t Constant = cast(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && @@ -2495,13 +2507,14 @@ isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } - Comparison C(CmpOp0, CmpOp1); + Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { C.CCValid = SystemZ::CCMASK_FCMP; - C.Opcode = SystemZISD::FCMP; + C.Opcode = C.Chain? SystemZISD::STRICT_FCMP : SystemZISD::FCMP; adjustForFNeg(C); } else { + assert(!C.Chain); C.CCValid = SystemZ::CCMASK_ICMP; C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can @@ -2559,6 +2572,10 @@ return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } + if (C.Chain) { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); + } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2603,24 +2620,46 @@ } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot -// be done directly. IsFP is true if CC is for a floating-point rather than -// integer comparison. -static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { +// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP +// for regular floating-point comparisons, and CmpMode::StrictFP for strict +// floating-point comparisons. +enum class CmpMode { Int, FP, StrictFP }; +static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { switch (CC) { case ISD::SETOEQ: case ISD::SETEQ: - return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPE; + case CmpMode::FP: return SystemZISD::VFCMPE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; + default: llvm_unreachable("Bad mode"); + } case ISD::SETOGE: case ISD::SETGE: - return IsFP ? SystemZISD::VFCMPHE : static_cast(0); + switch (Mode) { + case CmpMode::Int: return 0; + case CmpMode::FP: return SystemZISD::VFCMPHE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; + default: llvm_unreachable("Bad mode"); + } case ISD::SETOGT: case ISD::SETGT: - return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPH; + case CmpMode::FP: return SystemZISD::VFCMPH; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; + default: llvm_unreachable("Bad mode"); + } case ISD::SETUGT: - return IsFP ? static_cast(0) : SystemZISD::VICMPHL; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPHL; + case CmpMode::FP: return 0; + case CmpMode::StrictFP: return 0; + default: llvm_unreachable("Bad mode"); + } default: return 0; @@ -2629,17 +2668,16 @@ // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the -// result is for the inverse of CC. IsFP is true if CC is for a -// floating-point rather than integer comparison. -static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, +// result is for the inverse of CC. Mode is as above. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert) { - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = false; return Opcode; } - CC = ISD::getSetCCInverse(CC, !IsFP); - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int); + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = true; return Opcode; } @@ -2648,44 +2686,69 @@ } // Return a v2f64 that contains the extended form of elements Start and Start+1 -// of v4f32 value Op. +// of v4f32 value Op. If Chain is nonnull, return the strict form. static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, - SDValue Op) { + SDValue Op, SDValue Chain) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); + return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); + } return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); } // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, -// producing a result of type VT. +// producing a result of type VT. If Chain is nonnull, return the strict form. SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain) const { // There is no hardware support for v4f32 (unless we have the vector // enhancements facility 1), so extend the vector into two v2f64s // and compare those. if (CmpOp0.getValueType() == MVT::v4f32 && !Subtarget.hasVectorEnhancements1()) { - SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); - SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); - SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); - SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); + SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), + H1.getValue(1), L1.getValue(1), + HRes.getValue(1), LRes.getValue(1) }; + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue Ops[2] = { Res, NewChain }; + return DAG.getMergeValues(Ops, DL); + } SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); } + if (Chain) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); + } return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); } // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing -// an integer mask of type VT. +// an integer mask of type VT. If Chain is nonnull, we have a strict +// floating-point comparison. SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + assert (!Chain || IsFP); + CmpMode Mode = Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; bool Invert = false; SDValue Cmp; switch (CC) { @@ -2695,9 +2758,14 @@ LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GE.getValue(1)); break; } @@ -2707,9 +2775,14 @@ LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GT.getValue(1)); break; } @@ -2717,15 +2790,17 @@ // matter whether we try the inversion or the swap first, since // there are no cases where both work. default: - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); else { CC = ISD::getSetCCSwappedOperands(CC); - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); else llvm_unreachable("Unhandled comparison"); } + if (Chain) + Chain = Cmp.getValue(1); break; } if (Invert) { @@ -2733,6 +2808,10 @@ DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } + if (Chain && Chain.getNode() != Cmp.getNode()) { + SDValue Ops[2] = { Cmp, Chain }; + Cmp = DAG.getMergeValues(Ops, DL); + } return Cmp; } @@ -2751,6 +2830,27 @@ return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } +SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue CmpOp0 = Op.getOperand(1); + SDValue CmpOp1 = Op.getOperand(2); + ISD::CondCode CC = cast(Op.getOperand(3))->get(); + SDLoc DL(Op); + EVT VT = Op.getNode()->getValueType(0); + if (VT.isVector()) { + SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, Chain); + return Res.getValue(Op.getResNo()); + } + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain)); + SDValue CCReg = emitCmp(DAG, DL, C); + CCReg->setFlags(Op->getFlags()); + SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); + SDValue Ops[2] = { Result, CCReg.getValue(1) }; + return DAG.getMergeValues(Ops, DL); +} + SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); @@ -4962,6 +5062,8 @@ return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::STRICT_FSETCC: + return lowerSTRICT_FSETCC(Op, DAG); case ISD::GlobalAddress: return lowerGlobalAddress(cast(Op), DAG); case ISD::GlobalTLSAddress: @@ -5167,6 +5269,7 @@ OPCODE(IABS); OPCODE(ICMP); OPCODE(FCMP); + OPCODE(STRICT_FCMP); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); @@ -5229,13 +5332,17 @@ OPCODE(VICMPHS); OPCODE(VICMPHLS); OPCODE(VFCMPE); + OPCODE(STRICT_VFCMPE); OPCODE(VFCMPH); + OPCODE(STRICT_VFCMPH); OPCODE(VFCMPHE); + OPCODE(STRICT_VFCMPHE); OPCODE(VFCMPES); OPCODE(VFCMPHS); OPCODE(VFCMPHES); OPCODE(VFTCI); OPCODE(VEXTEND); + OPCODE(STRICT_VEXTEND); OPCODE(VROUND); OPCODE(VTM); OPCODE(VFAE_CC); Index: llvm/lib/Target/SystemZ/SystemZInstrFP.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -537,12 +537,12 @@ //===----------------------------------------------------------------------===// let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { - def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>; - def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>; - def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>; + def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; - def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; + def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>; def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>; def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>; Index: llvm/lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1134,7 +1134,7 @@ // Load lengthened. let Uses = [FPC], mayRaiseFPException = 1 in { def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>; def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>; } let Predicates = [FeatureVectorEnhancements1] in { @@ -1364,10 +1364,10 @@ // Compare scalar. let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; - def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>; let Predicates = [FeatureVectorEnhancements1] in { - def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; - def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>; } } @@ -1384,12 +1384,12 @@ // Compare equal. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>; - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes, v128f, v128sb, 2, 0>; defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1416,12 +1416,12 @@ // Compare high. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs, v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs, v128f, v128sb, 2, 0>; defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, v32f, v32sb, 2, 8>; @@ -1448,12 +1448,12 @@ // Compare high or equal. let Uses = [FPC], mayRaiseFPException = 1 in { def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; let Predicates = [FeatureVectorEnhancements1] in { - defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes, v128f, v128sb, 2, 0>; defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, v32f, v32sb, 2, 8>; Index: llvm/lib/Target/SystemZ/SystemZOperators.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZOperators.td +++ llvm/lib/Target/SystemZ/SystemZOperators.td @@ -258,6 +258,8 @@ def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>; def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; +def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain]>; @@ -328,12 +330,20 @@ def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>; def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>; def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; +def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; @@ -707,6 +717,23 @@ // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; +// Strict floating-point fragments. +def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_fcmp node:$lhs, node:$rhs), + (z_fcmp node:$lhs, node:$rhs)]>; +def z_any_vfcmpe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmpe node:$lhs, node:$rhs), + (z_vfcmpe node:$lhs, node:$rhs)]>; +def z_any_vfcmph : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmph node:$lhs, node:$rhs), + (z_vfcmph node:$lhs, node:$rhs)]>; +def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs), + [(z_strict_vfcmphe node:$lhs, node:$rhs), + (z_vfcmphe node:$lhs, node:$rhs)]>; +def z_any_vextend : PatFrags<(ops node:$src), + [(z_strict_vextend node:$src), + (z_vextend node:$src)]>; + // Create a unary operator that loads from memory and then performs // the given operation on it. class loadu Index: llvm/lib/Target/SystemZ/SystemZPatterns.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZPatterns.td +++ llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -148,9 +148,9 @@ // registers in CLS against zero. The instruction has separate R1 and R2 // operands, but they must be the same when the instruction is used like this. multiclass CompareZeroFP { - def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; + def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; // The sign of the zero makes no difference. - def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; + def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; } // Use INSN for performing binary operation OPERATION of type VT Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll @@ -0,0 +1,435 @@ +; Test 32-bit floating-point comparison. The tests assume a z10 implementation +; of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare float @foo() + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) #0 { +; CHECK-LABEL: f1: +; CHECK: cebr %f0, %f2 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the CEB range. +define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned CEB range. +define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f3: +; CHECK: ceb %f0, 4092(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1023 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f4: +; CHECK: aghi %r4, 4096 +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 1024 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) #0 { +; CHECK-LABEL: f5: +; CHECK: aghi %r4, -4 +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float, float *%base, i64 -1 + %f2 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that CEB allows indices. +define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) #0 { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r5, 2 +; CHECK: ceb %f0, 400(%r1,%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%base, i64 %index + %ptr2 = getelementptr float, float *%ptr1, i64 100 + %f2 = load float, float *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that comparisons of spilled values can use CEB rather than CEBR. +define float @f7(float *%ptr0) #0 { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr float, float *%ptr0, i64 2 + %ptr2 = getelementptr float, float *%ptr0, i64 4 + %ptr3 = getelementptr float, float *%ptr0, i64 6 + %ptr4 = getelementptr float, float *%ptr0, i64 8 + %ptr5 = getelementptr float, float *%ptr0, i64 10 + %ptr6 = getelementptr float, float *%ptr0, i64 12 + %ptr7 = getelementptr float, float *%ptr0, i64 14 + %ptr8 = getelementptr float, float *%ptr0, i64 16 + %ptr9 = getelementptr float, float *%ptr0, i64 18 + %ptr10 = getelementptr float, float *%ptr0, i64 20 + + %val0 = load float, float *%ptr0 + %val1 = load float, float *%ptr1 + %val2 = load float, float *%ptr2 + %val3 = load float, float *%ptr3 + %val4 = load float, float *%ptr4 + %val5 = load float, float *%ptr5 + %val6 = load float, float *%ptr6 + %val7 = load float, float *%ptr7 + %val8 = load float, float *%ptr8 + %val9 = load float, float *%ptr9 + %val10 = load float, float *%ptr10 + + %ret = call float @foo() #0 + + %cmp0 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp1 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val1, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp3 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val3, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp4 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val4, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp5 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val5, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp6 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val6, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp7 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val7, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp8 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val8, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp9 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val9, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp10 = call i1 @llvm.experimental.constrained.fcmp.f32( + float %ret, float %val10, + metadata !"olt", + metadata !"fpexcept.strict") #0 + + %sel0 = select i1 %cmp0, float %ret, float 0.0 + %sel1 = select i1 %cmp1, float %sel0, float 1.0 + %sel2 = select i1 %cmp2, float %sel1, float 2.0 + %sel3 = select i1 %cmp3, float %sel2, float 3.0 + %sel4 = select i1 %cmp4, float %sel3, float 4.0 + %sel5 = select i1 %cmp5, float %sel4, float 5.0 + %sel6 = select i1 %cmp6, float %sel5, float 6.0 + %sel7 = select i1 %cmp7, float %sel6, float 7.0 + %sel8 = select i1 %cmp8, float %sel7, float 8.0 + %sel9 = select i1 %cmp9, float %sel8, float 9.0 + %sel10 = select i1 %cmp10, float %sel9, float 10.0 + + ret float %sel10 +} + +; Check comparison with zero. +define i64 @f8(i64 %a, i64 %b, float %f) #0 { +; CHECK-LABEL: f8: +; CHECK: ltebr %f0, %f0 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows CEB to be used, +; first with oeq. +define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f9: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then one. +define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f10: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then olt. +define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f11: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ole. +define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f12: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then oge. +define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f13: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnle %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ogt. +define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f14: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ueq. +define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f15: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnlhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrlh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then une. +define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f16: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bner %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgre %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ult. +define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f17: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrle %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ule. +define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f18: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnlr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrl %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then uge. +define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f19: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrh %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; ...then ugt. +define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) #0 { +; CHECK-LABEL: f20: +; CHECK: ceb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: bnher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrhe %r2, %r3 +; CHECK: br %r14 + %f1 = load float, float *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll @@ -0,0 +1,249 @@ +; Test 64-bit floating-point comparison. The tests assume a z10 implementation +; of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare double @foo() + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) #0 { +; CHECK-LABEL: f1: +; CHECK: cdbr %f0, %f2 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the CDB range. +define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned CDB range. +define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f3: +; CHECK: cdb %f0, 4088(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 511 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f4: +; CHECK: aghi %r4, 4096 +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 512 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) #0 { +; CHECK-LABEL: f5: +; CHECK: aghi %r4, -8 +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i64 -1 + %f2 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that CDB allows indices. +define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) #0 { +; CHECK-LABEL: f6: +; CHECK: sllg %r1, %r5, 3 +; CHECK: cdb %f0, 800(%r1,%r4) +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%base, i64 %index + %ptr2 = getelementptr double, double *%ptr1, i64 100 + %f2 = load double, double *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that comparisons of spilled values can use CDB rather than CDBR. +define double @f7(double *%ptr0) #0 { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr double, double *%ptr0, i64 2 + %ptr2 = getelementptr double, double *%ptr0, i64 4 + %ptr3 = getelementptr double, double *%ptr0, i64 6 + %ptr4 = getelementptr double, double *%ptr0, i64 8 + %ptr5 = getelementptr double, double *%ptr0, i64 10 + %ptr6 = getelementptr double, double *%ptr0, i64 12 + %ptr7 = getelementptr double, double *%ptr0, i64 14 + %ptr8 = getelementptr double, double *%ptr0, i64 16 + %ptr9 = getelementptr double, double *%ptr0, i64 18 + %ptr10 = getelementptr double, double *%ptr0, i64 20 + + %val0 = load double, double *%ptr0 + %val1 = load double, double *%ptr1 + %val2 = load double, double *%ptr2 + %val3 = load double, double *%ptr3 + %val4 = load double, double *%ptr4 + %val5 = load double, double *%ptr5 + %val6 = load double, double *%ptr6 + %val7 = load double, double *%ptr7 + %val8 = load double, double *%ptr8 + %val9 = load double, double *%ptr9 + %val10 = load double, double *%ptr10 + + %ret = call double @foo() #0 + + %cmp0 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp1 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val1, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp3 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val3, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp4 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val4, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp5 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val5, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp6 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val6, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp7 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val7, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp8 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val8, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp9 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val9, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %cmp10 = call i1 @llvm.experimental.constrained.fcmp.f64( + double %ret, double %val10, + metadata !"olt", + metadata !"fpexcept.strict") #0 + + %sel0 = select i1 %cmp0, double %ret, double 0.0 + %sel1 = select i1 %cmp1, double %sel0, double 1.0 + %sel2 = select i1 %cmp2, double %sel1, double 2.0 + %sel3 = select i1 %cmp3, double %sel2, double 3.0 + %sel4 = select i1 %cmp4, double %sel3, double 4.0 + %sel5 = select i1 %cmp5, double %sel4, double 5.0 + %sel6 = select i1 %cmp6, double %sel5, double 6.0 + %sel7 = select i1 %cmp7, double %sel6, double 7.0 + %sel8 = select i1 %cmp8, double %sel7, double 8.0 + %sel9 = select i1 %cmp9, double %sel8, double 9.0 + %sel10 = select i1 %cmp10, double %sel9, double 10.0 + + ret double %sel10 +} + +; Check comparison with zero. +define i64 @f8(i64 %a, i64 %b, double %f) #0 { +; CHECK-LABEL: f8: +; CHECK-SCALAR: ltdbr %f0, %f0 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR: ltdbr %f0, %f0 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f, double 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the comparison can be reversed if that allows CDB to be used, +define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) #0 { +; CHECK-LABEL: f9: +; CHECK: cdb %f0, 0(%r4) +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 +; CHECK: br %r14 + %f1 = load double, double *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll @@ -0,0 +1,47 @@ +; Test 128-bit floating-point comparison. The tests assume a z10 implementation +; of select, using conditional branches rather than LOCGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: lxebr %f0, %f0 +; CHECK-DAG: ld %f1, 0(%r4) +; CHECK-DAG: ld %f3, 8(%r4) +; CHECK: cxbr %f1, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f2x = fpext float %f2 to fp128 + %f1 = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2x, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK: ld %f0, 0(%r4) +; CHECK: ld %f2, 8(%r4) +; CHECK: ltxbr %f0, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f, fp128 0xL00000000000000000000000000000000, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll @@ -0,0 +1,461 @@ +; Test that floating-point compares are omitted if CC already has the +; right value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as | FileCheck %s + +declare float @llvm.fabs.f32(float %f) + +; Test addition followed by EQ, which can use the CC result of the addition. +define float @f1(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f1: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with LT. +define float @f2(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f2: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with GT. +define float @f3(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f3: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: bhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; ...and again with UEQ. +define float @f4(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f4: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: bnlhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Subtraction also provides a zero-based CC value. +define float @f5(float %a, float %b, float *%dest) { +; CHECK-LABEL: f5: +; CHECK: seb %f0, 0(%r2) +; CHECK-NEXT: bnher %r14 +; CHECK: br %r14 +entry: + %cur = load float, float *%dest + %res = call float @llvm.experimental.constrained.fsub.f32( + float %a, float %cur, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ult", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD POSITIVE. +define float @f6(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f6: +; CHECK: lpebr %f0, %f2 +; CHECK-NEXT: bhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.fabs.f32(float %a) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD NEGATIVE. +define float @f7(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f7: +; CHECK: lnebr %f0, %f2 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %abs = call float @llvm.fabs.f32(float %a) + %res = fsub float -0.0, %abs + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test the result of LOAD COMPLEMENT. +define float @f8(float %dummy, float %a, float *%dest) #0 { +; CHECK-LABEL: f8: +; CHECK: lcebr %f0, %f2 +; CHECK-NEXT: bler %r14 +; CHECK: br %r14 +entry: + %res = fsub float -0.0, %a + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"ole", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Multiplication (for example) does not modify CC. +define float @f9(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f9: +; CHECK: meebr %f0, %f2 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: blhr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fmul.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"one", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test a combination involving a CC-setting instruction followed by +; a non-CC-setting instruction. +define float @f10(float %a, float %b, float %c, float *%dest) #0 { +; CHECK-LABEL: f10: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: debr %f0, %f4 +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: bner %r14 +; CHECK: br %r14 +entry: + %add = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %res = call float @llvm.experimental.constrained.fdiv.f32( + float %add, float %c, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float 0.0, + metadata !"une", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test a case where CC is set based on a different register from the +; compare input. +define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) #0 { +; CHECK-LABEL: f11: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: sebr %f4, %f0 +; CHECK-DAG: ste %f4, 0(%r2) +; CHECK-DAG: ltebr %f0, %f0 +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 +entry: + %add = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %sub = call float @llvm.experimental.constrained.fsub.f32( + float %c, float %add, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store float %sub, float *%dest1 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %add, float 0.0, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %sub, float *%dest2 + br label %exit + +exit: + ret float %add +} + +; Test that LER gets converted to LTEBR where useful. +define float @f12(float %dummy, float %val, float *%dest) #0 { +; CHECK-LABEL: f12: +; CHECK: ltebr %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f0}"(float %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %val, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %val, float *%dest + br label %exit + +exit: + ret float %val +} + +; Test that LDR gets converted to LTDBR where useful. +define double @f13(double %dummy, double %val, double *%dest) #0 { +; CHECK-LABEL: f13: +; CHECK: ltdbr %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f0}"(double %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f64( + double %val, double 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store double %val, double *%dest + br label %exit + +exit: + ret double %val +} + +; Test that LXR gets converted to LTXBR where useful. +define void @f14(fp128 *%ptr1, fp128 *%ptr2) #0 { +; CHECK-LABEL: f14: +; CHECK: ltxbr +; CHECK-NEXT: dxbr +; CHECK-NEXT: std +; CHECK-NEXT: std +; CHECK-NEXT: mxbr +; CHECK-NEXT: std +; CHECK-NEXT: std +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %val1 = load fp128, fp128 *%ptr1 + %val2 = load fp128, fp128 *%ptr2 + %div = fdiv fp128 %val1, %val2 + store fp128 %div, fp128 *%ptr1 + %mul = fmul fp128 %val1, %val2 + store fp128 %mul, fp128 *%ptr2 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %val1, fp128 0xL00000000000000000000000000000000, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() + br label %exit + +exit: + ret void +} + +; Test a case where it is the source rather than destination of LER that +; we need. +define float @f15(float %val, float %dummy, float *%dest) #0 { +; CHECK-LABEL: f15: +; CHECK: ltebr %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f2}"(float %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %val, float 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %val, float *%dest + br label %exit + +exit: + ret float %val +} + +; Test a case where it is the source rather than destination of LDR that +; we need. +define double @f16(double %val, double %dummy, double *%dest) #0 { +; CHECK-LABEL: f16: +; CHECK: ltdbr %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{f2}"(double %val) + %cmp = call i1 @llvm.experimental.constrained.fcmp.f64( + double %val, double 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store double %val, double *%dest + br label %exit + +exit: + ret double %val +} + +; Repeat f2 with a comparison against -0. +define float @f17(float %a, float %b, float *%dest) #0 { +; CHECK-LABEL: f17: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: blr %r14 +; CHECK: br %r14 +entry: + %res = call float @llvm.experimental.constrained.fadd.f32( + float %a, float %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( + float %res, float -0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll @@ -0,0 +1,44 @@ +; Test f128 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr1, fp128 *%ptr2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r5) +; CHECK: wfcxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f1 = load fp128, fp128 *%ptr1 + %f2 = load fp128, fp128 *%ptr2 + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check comparison with zero -- it is not worthwhile to copy to +; FP pairs just so we can use LTXBR, so simply load up a zero. +define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4) +; CHECK-DAG: vzero [[REG2:%v[0-9]+]] +; CHECK: wfcxb [[REG1]], [[REG2]] +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f = load fp128, fp128 *%ptr + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f, fp128 0xL00000000000000000000000000000000, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmp-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmp-05.ll @@ -0,0 +1,560 @@ +; Test strict v4f32 comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vo %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vno %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] +; CHECK-NEXT: vno %v24, [[RES]], [[RES]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vo %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] +; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] +; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] +; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] +; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] +; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] +; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] +; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] +; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] +; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] +; CHECK: vno %v24, [[RES1]], [[RES0]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vpkg [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK: vo [[REG:%v[0-9]+]], +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata) Index: llvm/test/CodeGen/SystemZ/vec-strict-cmp-06.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmp-06.ll @@ -0,0 +1,442 @@ +; Test f64 and v2f64 strict comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test oeq. +define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: vfcedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test one. +define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ogt. +define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK: vfchdb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oge. +define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK: vfchedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ole. +define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK: vfchedb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test olt. +define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK: vfchdb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ueq. +define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test une. +define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ugt. +define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uge. +define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ule. +define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ult. +define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ord. +define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uno. +define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oeq selects. +define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test one selects. +define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ogt selects. +define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test oge selects. +define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ole selects. +define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test olt selects. +define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ueq selects. +define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test une selects. +define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ugt selects. +define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uge selects. +define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ule selects. +define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ult selects. +define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ord selects. +define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uno selects. +define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %val1, <2 x double> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test an f64 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 { +; CHECK-LABEL: f29: +; CHECK: wfcdb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <2 x double> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + Index: llvm/test/CodeGen/SystemZ/vec-strict-cmp-07.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/vec-strict-cmp-07.ll @@ -0,0 +1,442 @@ +; Test strict f32 and v4f32 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f1: +; CHECK: vfcesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f3: +; CHECK: vfchsb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f4: +; CHECK: vfchesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f5: +; CHECK: vfchesb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f6: +; CHECK: vfchsb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f8: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 { +; CHECK-LABEL: f9: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f10: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f11: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f12: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) #0 { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f15: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"one", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f17: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ogt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f18: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"oge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f19: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ole", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f20: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"olt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ueq", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f22: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"une", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f23: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ugt", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f24: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uge", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f25: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ule", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f26: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ult", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"ord", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) #0 { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %val1, <4 x float> %val2, + metadata !"uno", + metadata !"fpexcept.strict") #0 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test an f32 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) #0 { +; CHECK-LABEL: f29: +; CHECK: wfcsb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <4 x float> %vec, i32 0 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +