diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -975,13 +975,16 @@ /// Helper function to make it easier to build SetCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, - ISD::CondCode Cond) { + ISD::CondCode Cond, SDValue Chain = SDValue()) { assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() && "Cannot compare scalars to vectors"); assert(LHS.getValueType().isVector() == VT.isVector() && "Cannot compare scalars to vectors"); assert(Cond != ISD::SETCC_INVALID && "Cannot create a setCC of an invalid node."); + if (Chain) + return getNode(ISD::STRICT_FSETCC, DL, { VT, MVT::Other }, + { Chain, LHS, RHS, getCondCode(Cond) }); return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -134,7 +134,7 @@ ArrayRef Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - bool &NeedInvert, const SDLoc &dl); + bool &NeedInvert, const SDLoc &dl, SDValue &Chain); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -1638,7 +1638,8 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, - const SDLoc &dl) { + const SDLoc &dl, + SDValue &Chain) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); NeedInvert = false; @@ -1721,13 +1722,16 @@ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain); } + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), + SetCC2.getValue(1)); LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -3509,12 +3513,16 @@ } Results.push_back(Tmp1); break; - case ISD::SETCC: { - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - Tmp3 = Node->getOperand(2); + case ISD::SETCC: + case ISD::STRICT_FSETCC: { + bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC; + SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); + unsigned Offset = IsStrict ? 1 : 0; + Tmp1 = Node->getOperand(0 + Offset); + Tmp2 = Node->getOperand(1 + Offset); + Tmp3 = Node->getOperand(2 + Offset); bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, - Tmp3, NeedInvert, dl); + Tmp3, NeedInvert, dl, Chain); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the @@ -3528,6 +3536,13 @@ if (NeedInvert) Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + if (IsStrict) { + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Chain); + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FSETCC node\n"); + return true; + } + Results.push_back(Tmp1); break; } @@ -3559,6 +3574,7 @@ Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False EVT VT = Node->getValueType(0); + SDValue Chain; SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast(CC)->get(); @@ -3603,7 +3619,7 @@ if (!Legalized) { Legalized = LegalizeSetCCCondCode( getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, - dl); + dl, Chain); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); @@ -3629,13 +3645,14 @@ break; } case ISD::BR_CC: { + SDValue Chain; Tmp1 = Node->getOperand(0); // Chain Tmp2 = Node->getOperand(2); // LHS Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( - Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2792,6 +2792,13 @@ BitWidth > 1) Known.Zero.setBitsFrom(1); break; + case ISD::STRICT_FSETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(1).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -542,6 +542,10 @@ // this happens we will use 512-bit operations and the mask will not be // zero extended. EVT OpVT = N->getOperand(0).getValueType(); + // The first operand of X86ISD::CMPM is chain, so we need to get the second + // operand. + if (Opcode == X86ISD::CMPM) + OpVT = N->getOperand(1).getValueType(); if (OpVT.is256BitVector() || OpVT.is128BitVector()) return Subtarget->hasVLX(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -79,6 +79,9 @@ /// X86 compare and logical compare instructions. CMP, COMI, UCOMI, + /// X86 strict FP compare instructions. + STRICT_FCMP, + /// X86 bit-test instructions. BT, @@ -1340,6 +1343,7 @@ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -1480,7 +1484,7 @@ /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent, for use with the given x86 condition code. SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, - SelectionDAG &DAG) const; + SelectionDAG &DAG, SDValue Chain = SDValue()) const; /// Convert a comparison if required by the subtarget. SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; @@ -1490,7 +1494,7 @@ SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, - SDValue &X86CC) const; + SDValue &X86CC, SDValue &Chain) const; /// Check if replacement of SQRT with RSQRT should be disabled. bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -393,6 +393,7 @@ for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); } for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { if (VT == MVT::i64 && !Subtarget.is64Bit()) @@ -873,6 +874,7 @@ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::ABS, VT, Custom); @@ -1139,6 +1141,7 @@ for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTLZ, VT, Custom); @@ -1301,6 +1304,7 @@ setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::UADDSAT, VT, Custom); @@ -1429,6 +1433,7 @@ setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use @@ -20129,14 +20134,19 @@ /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - const SDLoc &dl, SelectionDAG &DAG) const { + const SDLoc &dl, SelectionDAG &DAG, + SDValue Chain) const { if (isNullConstant(Op1)) return EmitTest(Op0, X86CC, dl, DAG, Subtarget); EVT CmpVT = Op0.getValueType(); - if (CmpVT.isFloatingPoint()) + if (CmpVT.isFloatingPoint()) { + if (Chain) + return DAG.getNode(X86ISD::STRICT_FCMP, dl, { MVT::i32, MVT::Other }, + { Chain, Op0, Op1 }); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); + } assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!"); @@ -20198,16 +20208,18 @@ SelectionDAG &DAG) const { // If the subtarget does not support the FUCOMI instruction, floating-point // comparisons have to be converted. - if (Subtarget.hasCMov() || - Cmp.getOpcode() != X86ISD::CMP || - !Cmp.getOperand(0).getValueType().isFloatingPoint() || - !Cmp.getOperand(1).getValueType().isFloatingPoint()) + bool IsCmp = Cmp.getOpcode() == X86ISD::CMP; + bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP; + + if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) || + !Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() || + !Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint()) return Cmp; // The instruction selector will select an FUCOM instruction instead of // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence // build an SDNode sequence that transfers the result from FPSW into EFLAGS: - // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8)))) + // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8)))) SDLoc dl(Cmp); SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); @@ -20634,12 +20646,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue CC = Op.getOperand(2); - MVT VT = Op.getSimpleValueType(); + bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC; + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); + SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); + SDValue CC = Op.getOperand(IsStrict ? 3 : 2); + MVT VT = Op->getSimpleValueType(0); ISD::CondCode Cond = cast(CC)->get(); - bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); + bool isFP = Op1.getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); if (isFP) { @@ -20679,26 +20693,34 @@ CombineOpc = X86ISD::FAND; } - SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getTargetConstant(CC0, dl, MVT::i8)); - SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getTargetConstant(CC1, dl, MVT::i8)); + SDValue Cmp0 = DAG.getNode(Opc, dl, { VT, MVT::Other }, { Chain, Op0, Op1, + DAG.getTargetConstant(CC0, dl, MVT::i8) }); + SDValue Cmp1 = DAG.getNode(Opc, dl, { VT, MVT::Other }, { Chain, Op0, Op1, + DAG.getTargetConstant(CC1, dl, MVT::i8) }); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1), + Cmp1.getValue(1)); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { // Handle all other FP comparisons here. - Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getTargetConstant(SSECC, dl, MVT::i8)); + Cmp = DAG.getNode(Opc, dl, { VT, MVT::Other }, { Chain, Op0, Op1, + DAG.getTargetConstant(SSECC, dl, MVT::i8) }); + Chain = Cmp.getValue(1); } // If this is SSE/AVX CMPP, bitcast the result back to integer to match the // result type of SETCC. The bitcast is expected to be optimized away // during combining/isel. if (Opc == X86ISD::CMPP) - Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); + Cmp = DAG.getBitcast(Op->getSimpleValueType(0), Cmp); + + if (IsStrict) + return DAG.getMergeValues({ Cmp, Chain }, dl); return Cmp; } + assert(!IsStrict && "Strict SETCC only handles FP operands."); + MVT VTOp0 = Op0.getSimpleValueType(); (void)VTOp0; assert(VTOp0 == Op1.getSimpleValueType() && @@ -21008,8 +21030,8 @@ /// corresponding X86 condition code constant in X86CC. SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, - SelectionDAG &DAG, - SDValue &X86CC) const { + SelectionDAG &DAG, SDValue &X86CC, + SDValue &Chain) const { // Optimize to BT if possible. // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). @@ -21057,7 +21079,9 @@ if (CondCode == X86::COND_INVALID) return SDValue(); - SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG); + SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Chain); + if (Chain) + Chain = EFLAGS.getValue(1); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); return EFLAGS; @@ -21065,19 +21089,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getSimpleValueType(); + bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC; + MVT VT = Op->getSimpleValueType(0); if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); + SDValue Op0 = Op.getOperand(IsStrict ? 1: 0); + SDValue Op1 = Op.getOperand(IsStrict ? 2: 1); SDLoc dl(Op); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); + ISD::CondCode CC + = cast(Op.getOperand(IsStrict ? 3 : 2))->get(); // Handle f128 first, since one possible outcome is a normal integer // comparison which gets handled by emitFlagsForSetcc. if (Op0.getValueType() == MVT::f128) { + assert(!IsStrict && "Unhandled strict operation!"); softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1); // If softenSetCCOperands returned a scalar, use it. @@ -21089,11 +21117,16 @@ } SDValue X86CC; - SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); + SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC, Chain); if (!EFLAGS) return SDValue(); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); + SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); + + if (IsStrict) + return DAG.getMergeValues({ Res, Chain }, dl); + + return Res; } SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { @@ -21314,7 +21347,8 @@ } } - if (Cond.getOpcode() == ISD::SETCC) { + if (Cond.getOpcode() == ISD::SETCC || + Cond.getOpcode() == ISD::STRICT_FSETCC) { if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; // If the condition was updated, it's possible that the operands of the @@ -22789,6 +22823,24 @@ unsigned IntNo = Op.getConstantOperandVal(0); MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); + + // We share some nodes between STRICT and non STRICT FP intrinsics. + // For these nodes, we need chain them to entry token if they are not called + // by STRICT FP intrinsics. + auto getProperNode = [&](unsigned Opcode, EVT VT, ArrayRef Ops) { + switch (Opcode) { + default: + return DAG.getNode(Opcode, dl, VT, Ops); + case X86ISD::CMPP: + case X86ISD::CMPM: + break; + } + + SmallVector NewOps = { DAG.getEntryNode() }; + NewOps.append(Ops.begin(), Ops.end()); + return DAG.getNode(Opcode, dl, { VT, MVT::Other }, NewOps); + }; + if (IntrData) { switch(IntrData->Type) { case INTR_TYPE_1OP: { @@ -22877,8 +22929,8 @@ return SDValue(); } - return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), - Src1, Src2, Src3); + return getProperNode(IntrData->Opc0, Op.getValueType(), + { Src1, Src2, Src3 }); } case INTR_TYPE_4OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), @@ -23126,8 +23178,8 @@ return SDValue(); } //default rounding mode - return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2), CC); + return getProperNode(IntrData->Opc0, MaskVT, + { Op.getOperand(1), Op.getOperand(2), CC }); } case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); @@ -27730,7 +27782,8 @@ case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SETCC: + case ISD::STRICT_FSETCC: return LowerSETCC(Op, DAG); case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); @@ -28582,6 +28635,7 @@ case X86ISD::CALL: return "X86ISD::CALL"; case X86ISD::BT: return "X86ISD::BT"; case X86ISD::CMP: return "X86ISD::CMP"; + case X86ISD::STRICT_FCMP: return "X86ISD::STRICT_FCMP"; case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::CMPM: return "X86ISD::CMPM"; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8683,10 +8683,10 @@ } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, + defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, + defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -637,11 +637,11 @@ // Floating point compares. let SchedRW = [WriteFCom], Uses = [FPCW] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; + [(set FPSW, (trunc (X86any_fcmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>; + [(set FPSW, (trunc (X86any_fcmp RFP64:$lhs, RFP64:$rhs)))]>; def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>; + [(set FPSW, (trunc (X86any_fcmp RFP80:$lhs, RFP80:$rhs)))]>; } // SchedRW } // mayRaiseFPException = 1 @@ -649,13 +649,13 @@ // CC = ST(0) cmp ST(i) let Defs = [EFLAGS, FPCW], Uses = [FPCW] in { def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>, + [(set EFLAGS, (X86any_fcmp RFP32:$lhs, RFP32:$rhs))]>, Requires<[FPStackf32, HasCMov]>; def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>, + [(set EFLAGS, (X86any_fcmp RFP64:$lhs, RFP64:$rhs))]>, Requires<[FPStackf64, HasCMov]>; def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>, + [(set EFLAGS, (X86any_fcmp RFP80:$lhs, RFP80:$rhs))]>, Requires<[HasCMov]>; } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -169,7 +169,7 @@ def X86vshldq : SDNode<"X86ISD::VSHLDQ", X86vshiftimm>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", X86vshiftimm>; -def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; +def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP, [SDNPHasChain]>; def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; @@ -181,7 +181,7 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPHasChain]>; def X86cmpmSAE : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>; def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -142,6 +142,7 @@ def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; +def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86CmpTest, [SDNPHasChain]>; def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>; def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; @@ -375,6 +376,9 @@ } def anymem : X86MemOperand<"printanymem">; +def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), + [(X86cmp node:$lhs, node:$rhs), + (X86strict_fcmp node:$lhs, node:$rhs)]>; // FIXME: Right now we allow any size during parsing, but we might want to // restrict to only unsized memory. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1851,9 +1851,9 @@ } let Defs = [EFLAGS] in { - defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; - defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, @@ -1873,9 +1873,9 @@ defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; } - defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, "ucomiss", WriteFCom>, PS; - defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, "ucomisd", WriteFCom>, PD; let Pattern = [] in { diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll @@ -0,0 +1,1539 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: Need add option -disable-strictnode-mutation after rebased +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse,+cmov -O3 | FileCheck %s --check-prefixes=X87-CMOV + +define i32 @f1(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f1: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnel %eax, %ecx +; CHECK-32-NEXT: cmovpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f1: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnel %esi, %eax +; CHECK-64-NEXT: cmovpl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f1: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: jne .LBB0_3 +; X87-NEXT: # %bb.1: +; X87-NEXT: jp .LBB0_3 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: .LBB0_3: +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f1: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnel %eax, %ecx +; X87-CMOV-NEXT: cmovpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f2(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f2: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmoval %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f2: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovbel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f2: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: ja .LBB1_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB1_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f2: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmoval %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f3(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f3: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovael %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f3: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovbl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f3: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jae .LBB2_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB2_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f3: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovael %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f4(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f4: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmoval %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f4: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm0, %xmm1 +; CHECK-64-NEXT: cmovbel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f4: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: ja .LBB3_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB3_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f4: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmoval %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f5(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f5: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovael %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f5: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm0, %xmm1 +; CHECK-64-NEXT: cmovbl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f5: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jae .LBB4_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB4_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f5: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovael %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f6(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f6: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f6: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f6: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jne .LBB5_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB5_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f6: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f7(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f7: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f7: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovpl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f7: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jnp .LBB6_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB6_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f7: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f8(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f8: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f8: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f8: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: je .LBB7_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB7_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f8: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f9(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f9: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f9: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm0, %xmm1 +; CHECK-64-NEXT: cmovael %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f9: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jb .LBB8_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB8_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f9: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f10(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f10: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f10: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm0, %xmm1 +; CHECK-64-NEXT: cmoval %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f10: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jbe .LBB9_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB9_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f10: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f11(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f11: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f11: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovael %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f11: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jb .LBB10_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB10_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f11: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f12(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f12: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f12: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmoval %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f12: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jbe .LBB11_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB11_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f12: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f13(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f13: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnel %eax, %ecx +; CHECK-32-NEXT: cmovpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f13: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %esi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnel %edi, %eax +; CHECK-64-NEXT: cmovpl %edi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f13: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: jne .LBB12_3 +; X87-NEXT: # %bb.1: +; X87-NEXT: jp .LBB12_3 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: .LBB12_3: +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f13: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnel %eax, %ecx +; X87-CMOV-NEXT: cmovpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f14(i32 %a, i32 %b, float %f1, float %f2) #0 { +; CHECK-32-LABEL: f14: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-32-NEXT: {{v?}}ucomiss {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f14: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnpl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f14: +; X87: # %bb.0: +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jp .LBB13_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB13_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f14: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + float %f1, float %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f15(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f15: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnel %eax, %ecx +; CHECK-32-NEXT: cmovpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f15: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnel %esi, %eax +; CHECK-64-NEXT: cmovpl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f15: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: jne .LBB14_3 +; X87-NEXT: # %bb.1: +; X87-NEXT: jp .LBB14_3 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: .LBB14_3: +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f15: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnel %eax, %ecx +; X87-CMOV-NEXT: cmovpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f16(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f16: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmoval %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f16: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovbel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f16: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: ja .LBB15_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB15_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f16: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmoval %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f17(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f17: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovael %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f17: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovbl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f17: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jae .LBB16_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB16_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f17: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovael %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f18(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f18: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmoval %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f18: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm0, %xmm1 +; CHECK-64-NEXT: cmovbel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f18: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: ja .LBB17_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB17_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f18: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmoval %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f19(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f19: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovael %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f19: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm0, %xmm1 +; CHECK-64-NEXT: cmovbl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f19: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jae .LBB18_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB18_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f19: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovael %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f20(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f20: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f20: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f20: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jne .LBB19_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB19_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f20: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f21(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f21: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f21: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovpl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f21: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jnp .LBB20_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB20_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f21: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f22(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f22: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f22: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnel %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f22: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: je .LBB21_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB21_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f22: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f23(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f23: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f23: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm0, %xmm1 +; CHECK-64-NEXT: cmovael %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f23: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jb .LBB22_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB22_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f23: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f24(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f24: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f24: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm0, %xmm1 +; CHECK-64-NEXT: cmoval %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f24: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jbe .LBB23_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB23_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f24: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f25(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f25: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f25: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovael %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f25: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jb .LBB24_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB24_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f25: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f26(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f26: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovbel %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f26: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmoval %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f26: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jbe .LBB25_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB25_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f26: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovbel %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f27(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f27: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovnel %eax, %ecx +; CHECK-32-NEXT: cmovpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f27: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %esi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnel %edi, %eax +; CHECK-64-NEXT: cmovpl %edi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f27: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: jne .LBB26_3 +; X87-NEXT: # %bb.1: +; X87-NEXT: jp .LBB26_3 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: .LBB26_3: +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f27: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovnel %eax, %ecx +; X87-CMOV-NEXT: cmovpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f28(i32 %a, i32 %b, double %f1, double %f2) #0 { +; CHECK-32-LABEL: f28: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: {{v?}}movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-32-NEXT: {{v?}}ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: cmovpl %eax, %ecx +; CHECK-32-NEXT: movl (%ecx), %eax +; CHECK-32-NEXT: retl +; +; CHECK-64-LABEL: f28: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: movl %edi, %eax +; CHECK-64-NEXT: {{v?}}ucomisd %xmm1, %xmm0 +; CHECK-64-NEXT: cmovnpl %esi, %eax +; CHECK-64-NEXT: retq +; +; X87-LABEL: f28: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fucompp +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: jp .LBB27_1 +; X87-NEXT: # %bb.2: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; X87-NEXT: .LBB27_1: +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl (%eax), %eax +; X87-NEXT: retl +; +; X87-CMOV-LABEL: f28: +; X87-CMOV: # %bb.0: +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) +; X87-CMOV-NEXT: fucompi %st(1), %st +; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X87-CMOV-NEXT: cmovpl %eax, %ecx +; X87-CMOV-NEXT: movl (%ecx), %eax +; X87-CMOV-NEXT: retl + %cond = call i1 @llvm.experimental.constrained.fcmp.f64( + double %f1, double %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp128-strict-scalar-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp128-strict-scalar-cmp.ll @@ -0,0 +1,119 @@ +; FIXME: This test should be updated after rebased. +; RUN: llc < %s -mtriple=i686-unknown-unknown -O3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O3 + +define i32 @f1(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f2(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f3(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f4(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f5(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f6(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f7(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f8(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f9(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f10(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f11(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f12(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f13(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f14(i32 %a, i32 %b, fp128 %f1, fp128 %f2) #0 { + %cond = call i1 @llvm.experimental.constrained.fcmp.f128( + fp128 %f1, fp128 %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll @@ -0,0 +1,499 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87-64 + +define i32 @f1(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f1: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: jne .LBB0_3 +; X87-32-NEXT: # %bb.1: +; X87-32-NEXT: jp .LBB0_3 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: .LBB0_3: +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f1: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovnel %esi, %eax +; X87-64-NEXT: cmovpl %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f2(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f2: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: ja .LBB1_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB1_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f2: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovbel %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f3(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f3: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jae .LBB2_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB2_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f3: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovbl %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f4(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f4: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: ja .LBB3_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB3_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f4: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovbel %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f5(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f5: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jae .LBB4_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB4_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f5: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovbl %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f6(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f6: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jne .LBB5_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB5_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f6: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovel %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f7(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f7: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jnp .LBB6_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB6_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f7: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovpl %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f8(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f8: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: je .LBB7_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB7_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f8: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovnel %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f9(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f9: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jb .LBB8_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB8_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f9: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovael %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f10(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f10: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jbe .LBB9_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB9_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f10: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmoval %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f11(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f11: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jb .LBB10_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB10_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f11: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovael %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f12(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f12: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jbe .LBB11_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB11_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f12: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmoval %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f13(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f13: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: jne .LBB12_3 +; X87-32-NEXT: # %bb.1: +; X87-32-NEXT: jp .LBB12_3 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: .LBB12_3: +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f13: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %esi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovnel %edi, %eax +; X87-64-NEXT: cmovpl %edi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +define i32 @f14(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { +; X87-32-LABEL: f14: +; X87-32: # %bb.0: +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fldt {{[0-9]+}}(%esp) +; X87-32-NEXT: fucompp +; X87-32-NEXT: fnstsw %ax +; X87-32-NEXT: # kill: def $ah killed $ah killed $ax +; X87-32-NEXT: sahf +; X87-32-NEXT: jp .LBB13_1 +; X87-32-NEXT: # %bb.2: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; X87-32-NEXT: .LBB13_1: +; X87-32-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-32-NEXT: movl (%eax), %eax +; X87-32-NEXT: retl +; +; X87-64-LABEL: f14: +; X87-64: # %bb.0: +; X87-64-NEXT: movl %edi, %eax +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) +; X87-64-NEXT: fucompi %st(1), %st +; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: cmovnpl %esi, %eax +; X87-64-NEXT: retq + %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( + x86_fp80 %f1, x86_fp80 %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +attributes #0 = { strictfp } + +declare i1 @llvm.experimental.constrained.fcmp.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vec-strict-128-cmp.ll b/llvm/test/CodeGen/X86/vec-strict-128-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-128-cmp.ll @@ -0,0 +1,833 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: Need add option -disable-strictnode-mutation after rebased +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-64 + +define <4 x i32> @f1(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f1: +; SSE: # %bb.0: +; SSE: cmpeqps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f1: +; AVX: # %bb.0: +; AVX: vcmpeqps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f1: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeqps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f1: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeqps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f2(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f2: +; SSE: # %bb.0: +; SSE: cmpltps {{.*}}, %xmm3 +; SSE-NEXT: andps %xmm3, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm3 +; SSE-NEXT: orps %xmm3, %xmm0 +; +; AVX-LABEL: f2: +; AVX: # %bb.0: +; AVX: vcmpltps {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f2: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgtps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f2: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltps %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f3(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f3: +; SSE: # %bb.0: +; SSE: cmpleps {{.*}}, %xmm3 +; SSE-NEXT: andps %xmm3, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm3 +; SSE-NEXT: orps %xmm3, %xmm0 +; +; AVX-LABEL: f3: +; AVX: # %bb.0: +; AVX: vcmpleps {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f3: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgeps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f3: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpleps %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f4(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f4: +; SSE: # %bb.0: +; SSE: cmpltps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f4: +; AVX: # %bb.0: +; AVX: vcmpltps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f4: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpltps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f4: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f5(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f5: +; SSE: # %bb.0: +; SSE: cmpleps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f5: +; AVX: # %bb.0: +; AVX: vcmpleps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f5: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpleps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f5: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpleps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f6(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f6: +; SSE: # %bb.0: +; SSE: cmpneqps %xmm3, %xmm4 +; SSE-NEXT: cmpordps %xmm3, %xmm2 +; SSE-NEXT: andps %xmm4, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f6: +; AVX: # %bb.0: +; AVX: vcmpneq_oqps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f6: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneq_oqps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f6: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneq_oqps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f7(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f7: +; SSE: # %bb.0: +; SSE: cmpordps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f7: +; AVX: # %bb.0: +; AVX: vcmpordps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f7: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpordps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f7: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpordps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f8(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f8: +; SSE: # %bb.0: +; SSE: cmpeqps %xmm3, %xmm4 +; SSE-NEXT: cmpunordps %xmm3, %xmm2 +; SSE-NEXT: orps %xmm4, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f8: +; AVX: # %bb.0: +; AVX: vcmpeq_uqps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f8: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeq_uqps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f8: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeq_uqps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f9(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f9: +; SSE: # %bb.0: +; SSE: cmpnleps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f9: +; AVX: # %bb.0: +; AVX: vcmpnleps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f9: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnleps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f9: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnleps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f10(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f10: +; SSE: # %bb.0: +; SSE: cmpnltps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f10: +; AVX: # %bb.0: +; AVX: vcmpnltps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f10: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnltps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f10: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f11(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f11: +; SSE: # %bb.0: +; SSE: cmpnleps {{.*}}, %xmm3 +; SSE-NEXT: andps %xmm3, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm3 +; SSE-NEXT: orps %xmm3, %xmm0 +; +; AVX-LABEL: f11: +; AVX: # %bb.0: +; AVX: vcmpnleps {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f11: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngeps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f11: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnleps %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f12(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f12: +; SSE: # %bb.0: +; SSE: cmpnltps {{.*}}, %xmm3 +; SSE-NEXT: andps %xmm3, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm3 +; SSE-NEXT: orps %xmm3, %xmm0 +; +; AVX-LABEL: f12: +; AVX: # %bb.0: +; AVX: vcmpnltps {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f12: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngtps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f12: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltps %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f13(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f13: +; SSE: # %bb.0: +; SSE: cmpneqps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f13: +; AVX: # %bb.0: +; AVX: vcmpneqps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f13: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneqps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f13: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneqps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <4 x i32> @f14(<4 x i32> %a, <4 x i32> %b, <4 x float> %f1, <4 x float> %f2) #0 { +; SSE-LABEL: f14: +; SSE: # %bb.0: +; SSE: cmpunordps {{.*}}, %xmm2 +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm2, %xmm0 +; +; AVX-LABEL: f14: +; AVX: # %bb.0: +; AVX: vcmpunordps {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f14: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpunordps 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f14: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpunordps %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <4 x float> %f1, <4 x float> %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %res +} + +define <2 x i64> @f15(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f15: +; SSE: # %bb.0: +; SSE: cmpeqpd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f15: +; AVX: # %bb.0: +; AVX: vcmpeqpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f15: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeqpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f15: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f16(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f16: +; SSE: # %bb.0: +; SSE: cmpltpd {{.*}}, %xmm3 +; SSE-NEXT: andpd %xmm3, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm3 +; SSE-NEXT: orpd %xmm3, %xmm0 +; +; AVX-LABEL: f16: +; AVX: # %bb.0: +; AVX: vcmpltpd {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f16: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgtpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f16: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltpd %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f17(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f17: +; SSE: # %bb.0: +; SSE: cmplepd {{.*}}, %xmm3 +; SSE-NEXT: andpd %xmm3, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm3 +; SSE-NEXT: orpd %xmm3, %xmm0 +; +; AVX-LABEL: f17: +; AVX: # %bb.0: +; AVX: vcmplepd {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f17: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgepd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f17: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmplepd %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f18(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f18: +; SSE: # %bb.0: +; SSE: cmpltpd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f18: +; AVX: # %bb.0: +; AVX: vcmpltpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f18: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpltpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f18: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f19(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f19: +; SSE: # %bb.0: +; SSE: cmplepd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f19: +; AVX: # %bb.0: +; AVX: vcmplepd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f19: +; AVX512-32: # %bb.0: +; AVX512-32: vcmplepd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f19: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmplepd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f20(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f20: +; SSE: # %bb.0: +; SSE: cmpneqpd %xmm3, %xmm4 +; SSE-NEXT: cmpordpd %xmm3, %xmm2 +; SSE-NEXT: andpd %xmm4, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f20: +; AVX: # %bb.0: +; AVX: vcmpneq_oqpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f20: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneq_oqpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f20: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneq_oqpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f21(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f21: +; SSE: # %bb.0: +; SSE: cmpordpd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f21: +; AVX: # %bb.0: +; AVX: vcmpordpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f21: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpordpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f21: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpordpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f22(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f22: +; SSE: # %bb.0: +; SSE: cmpeqpd %xmm3, %xmm4 +; SSE-NEXT: cmpunordpd %xmm3, %xmm2 +; SSE-NEXT: orpd %xmm4, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f22: +; AVX: # %bb.0: +; AVX: vcmpeq_uqpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f22: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeq_uqpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f22: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeq_uqpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f23(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f23: +; SSE: # %bb.0: +; SSE: cmpnlepd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f23: +; AVX: # %bb.0: +; AVX: vcmpnlepd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f23: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnlepd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f23: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnlepd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f24(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f24: +; SSE: # %bb.0: +; SSE: cmpnltpd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f24: +; AVX: # %bb.0: +; AVX: vcmpnltpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f24: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnltpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f24: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f25(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f25: +; SSE: # %bb.0: +; SSE: cmpnlepd {{.*}}, %xmm3 +; SSE-NEXT: andpd %xmm3, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm3 +; SSE-NEXT: orpd %xmm3, %xmm0 +; +; AVX-LABEL: f25: +; AVX: # %bb.0: +; AVX: vcmpnlepd {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f25: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngepd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f25: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnlepd %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f26(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f26: +; SSE: # %bb.0: +; SSE: cmpnltpd {{.*}}, %xmm3 +; SSE-NEXT: andpd %xmm3, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm3 +; SSE-NEXT: orpd %xmm3, %xmm0 +; +; AVX-LABEL: f26: +; AVX: # %bb.0: +; AVX: vcmpnltpd {{.*}}, %xmm3, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f26: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngtpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f26: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltpd %xmm2, %xmm3, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f27(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f27: +; SSE: # %bb.0: +; SSE: cmpneqpd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f27: +; AVX: # %bb.0: +; AVX: vcmpneqpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f27: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneqpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f27: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneqpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +define <2 x i64> @f28(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, <2 x double> %f2) #0 { +; SSE-LABEL: f28: +; SSE: # %bb.0: +; SSE: cmpunordpd {{.*}}, %xmm2 +; SSE-NEXT: andpd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm2, %xmm0 +; +; AVX-LABEL: f28: +; AVX: # %bb.0: +; AVX: vcmpunordpd {{.*}}, %xmm2, %xmm2 +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; +; AVX512-32-LABEL: f28: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpunordpd 8(%ebp), %xmm2, %k1 +; AVX512-32-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; +; AVX512-64-LABEL: f28: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpunordpd %xmm3, %xmm2, %k1 +; AVX512-64-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} + %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <2 x double> %f1, <2 x double> %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %res +} + +attributes #0 = { strictfp } + +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vec-strict-256-cmp.ll b/llvm/test/CodeGen/X86/vec-strict-256-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-256-cmp.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: Need add option -disable-strictnode-mutation after rebased +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-64 + +define <8 x i32> @f1(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f1: +; AVX: # %bb.0: +; AVX: vcmpeqps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f1: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeqps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f1: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeqps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f2(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f2: +; AVX: # %bb.0: +; AVX: vcmpltps {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f2: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgtps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f2: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltps %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f3(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f3: +; AVX: # %bb.0: +; AVX: vcmpleps {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f3: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgeps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f3: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpleps %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f4(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f4: +; AVX: # %bb.0: +; AVX: vcmpltps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f4: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpltps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f4: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f5(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f5: +; AVX: # %bb.0: +; AVX: vcmpleps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f5: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpleps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f5: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpleps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f6(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f6: +; AVX: # %bb.0: +; AVX: vcmpneq_oqps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f6: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneq_oqps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f6: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneq_oqps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f7(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f7: +; AVX: # %bb.0: +; AVX: vcmpordps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f7: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpordps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f7: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpordps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f8(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f8: +; AVX: # %bb.0: +; AVX: vcmpeq_uqps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f8: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeq_uqps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f8: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f9(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f9: +; AVX: # %bb.0: +; AVX: vcmpnleps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f9: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnleps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f9: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnleps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f10(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f10: +; AVX: # %bb.0: +; AVX: vcmpnltps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f10: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnltps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f10: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f11(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f11: +; AVX: # %bb.0: +; AVX: vcmpnleps {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f11: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngeps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f11: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnleps %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f12(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f12: +; AVX: # %bb.0: +; AVX: vcmpnltps {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f12: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngtps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f12: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltps %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f13(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f13: +; AVX: # %bb.0: +; AVX: vcmpneqps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f13: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneqps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f13: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneqps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <8 x i32> @f14(<8 x i32> %a, <8 x i32> %b, <8 x float> %f1, <8 x float> %f2) #0 { +; AVX-LABEL: f14: +; AVX: # %bb.0: +; AVX: vcmpunordps {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f14: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpunordps 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f14: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpunordps %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <8 x float> %f1, <8 x float> %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %res +} + +define <4 x i64> @f15(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f15: +; AVX: # %bb.0: +; AVX: vcmpeqpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f15: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeqpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f15: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f16(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f16: +; AVX: # %bb.0: +; AVX: vcmpltpd {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f16: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgtpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f16: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltpd %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f17(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f17: +; AVX: # %bb.0: +; AVX: vcmplepd {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f17: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgepd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f17: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmplepd %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f18(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f18: +; AVX: # %bb.0: +; AVX: vcmpltpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f18: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpltpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f18: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f19(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f19: +; AVX: # %bb.0: +; AVX: vcmplepd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f19: +; AVX512-32: # %bb.0: +; AVX512-32: vcmplepd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f19: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmplepd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f20(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f20: +; AVX: # %bb.0: +; AVX: vcmpneq_oqpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f20: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneq_oqpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f20: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneq_oqpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f21(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f21: +; AVX: # %bb.0: +; AVX: vcmpordpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f21: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpordpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f21: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpordpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f22(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f22: +; AVX: # %bb.0: +; AVX: vcmpeq_uqpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f22: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeq_uqpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f22: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeq_uqpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f23(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f23: +; AVX: # %bb.0: +; AVX: vcmpnlepd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f23: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnlepd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f23: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnlepd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f24(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f24: +; AVX: # %bb.0: +; AVX: vcmpnltpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f24: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnltpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f24: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f25(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f25: +; AVX: # %bb.0: +; AVX: vcmpnlepd {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f25: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngepd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f25: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnlepd %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f26(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f26: +; AVX: # %bb.0: +; AVX: vcmpnltpd {{.*}}, %ymm3, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f26: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngtpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f26: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltpd %ymm2, %ymm3, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f27(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f27: +; AVX: # %bb.0: +; AVX: vcmpneqpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f27: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneqpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f27: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneqpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +define <4 x i64> @f28(<4 x i64> %a, <4 x i64> %b, <4 x double> %f1, <4 x double> %f2) #0 { +; AVX-LABEL: f28: +; AVX: # %bb.0: +; AVX: vcmpunordpd {{.*}}, %ymm2, %ymm2 +; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; +; AVX512-32-LABEL: f28: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpunordpd 8(%ebp), %ymm2, %k1 +; AVX512-32-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; +; AVX512-64-LABEL: f28: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpunordpd %ymm3, %ymm2, %k1 +; AVX512-64-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} + %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <4 x double> %f1, <4 x double> %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select <4 x i1> %cond, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %res +} + +attributes #0 = { strictfp } + +declare <8 x i1> @llvm.experimental.constrained.fcmp.v4f32(<8 x float>, <8 x float>, metadata, metadata) +declare <4 x i1> @llvm.experimental.constrained.fcmp.v2f64(<4 x double>, <4 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vec-strict-512-cmp.ll b/llvm/test/CodeGen/X86/vec-strict-512-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-512-cmp.ll @@ -0,0 +1,485 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: Need add option -disable-strictnode-mutation after rebased +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512-64 + +define <16 x i32> @f1(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f1: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeqps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f1: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeqps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f2(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f2: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgtps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f2: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltps %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f3(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f3: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgeps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f3: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpleps %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f4(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f4: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpltps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f4: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f5(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f5: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpleps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f5: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpleps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f6(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f6: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneq_oqps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f6: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneq_oqps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f7(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f7: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpordps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f7: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpordps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f8(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f8: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeq_uqps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f8: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeq_uqps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f9(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f9: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnleps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f9: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnleps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f10(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f10: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnltps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f10: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f11(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f11: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngeps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f11: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnleps %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f12(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f12: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngtps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f12: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltps %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f13(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f13: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneqps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f13: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneqps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <16 x i32> @f14(<16 x i32> %a, <16 x i32> %b, <16 x float> %f1, <16 x float> %f2) #0 { +; AVX512-32-LABEL: f14: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpunordps 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f14: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpunordps %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <16 x i1> @llvm.experimental.constrained.fcmp.v4f32( + <16 x float> %f1, <16 x float> %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select <16 x i1> %cond, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %res +} + +define <8 x i64> @f15(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f15: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeqpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f15: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f16(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f16: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgtpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f16: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltpd %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ogt", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f17(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f17: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpgepd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f17: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmplepd %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"oge", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f18(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f18: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpltpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f18: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpltpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"olt", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f19(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f19: +; AVX512-32: # %bb.0: +; AVX512-32: vcmplepd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f19: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmplepd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ole", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f20(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f20: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneq_oqpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f20: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneq_oqpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"one", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f21(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f21: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpordpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f21: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpordpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ord", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f22(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f22: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpeq_uqpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f22: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpeq_uqpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ueq", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f23(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f23: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnlepd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f23: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnlepd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ugt", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f24(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f24: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpnltpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f24: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"uge", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f25(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f25: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngepd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f25: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnlepd %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ult", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f26(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f26: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpngtpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f26: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpnltpd %zmm2, %zmm3, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"ule", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f27(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f27: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpneqpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f27: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpneqpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"une", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +define <8 x i64> @f28(<8 x i64> %a, <8 x i64> %b, <8 x double> %f1, <8 x double> %f2) #0 { +; AVX512-32-LABEL: f28: +; AVX512-32: # %bb.0: +; AVX512-32: vcmpunordpd 8(%ebp), %zmm2, %k1 +; AVX512-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; +; AVX512-64-LABEL: f28: +; AVX512-64: # %bb.0: +; AVX512-64-NEXT: vcmpunordpd %zmm3, %zmm2, %k1 +; AVX512-64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} + %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v2f64( + <8 x double> %f1, <8 x double> %f2, metadata !"uno", + metadata !"fpexcept.strict") #0 + %res = select <8 x i1> %cond, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %res +} + +attributes #0 = { strictfp } + +declare <16 x i1> @llvm.experimental.constrained.fcmp.v4f32(<16 x float>, <16 x float>, metadata, metadata) +declare <8 x i1> @llvm.experimental.constrained.fcmp.v2f64(<8 x double>, <8 x double>, metadata, metadata)