Index: ../lib/Target/X86/X86ISelLowering.cpp =================================================================== --- ../lib/Target/X86/X86ISelLowering.cpp +++ ../lib/Target/X86/X86ISelLowering.cpp @@ -17469,30 +17469,66 @@ ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG); - assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!"); - SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); - SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, dl, MVT::i8), Cond); + SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); + SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS); + SDValue SetCC; + switch (CC) { + case ISD::SETEQ: { // ZF,PF,CF <- 100 + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_E, dl, MVT::i8), Comi); + SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_NP, dl, MVT::i8), + Comi); + SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); + break; + } + case ISD::SETNE: { + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_NE, dl, MVT::i8), Comi); + SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_P, dl, MVT::i8), + Comi); + SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); + break; + } + case ISD::SETGT: // ZF,PF,CF <- 000 (CF = 0 and ZF = 0) + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_A, dl, MVT::i8), Comi); + break; + case ISD::SETLT: { // ZF,PF,CF <- 001 (CF = 1 and ZF = 0) + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_A, dl, MVT::i8), InvComi); + break; + } + case ISD::SETGE: // CF = 0 + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_AE, dl, MVT::i8), Comi); + break; + case ISD::SETLE: // (ZF=1 or CF=1) and PF=0 + SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_AE, dl, MVT::i8), InvComi); + break; + default: + llvm_unreachable("Unexpected illegal condition!"); + } return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case COMI_RM: { // Comparison intrinsics with Sae SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - SDValue CC = Op.getOperand(3); + unsigned CondVal = cast(Op.getOperand(3))->getZExtValue(); SDValue Sae = Op.getOperand(4); - auto ComiType = TranslateX86ConstCondToX86CC(CC); - // choose between ordered and unordered (comi/ucomi) - unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1; - SDValue Cond; - if (cast(Sae)->getZExtValue() != - X86::STATIC_ROUNDING::CUR_DIRECTION) - Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae); + + SDValue FCmp; + if (cast(Sae)->getZExtValue() == + X86::STATIC_ROUNDING::CUR_DIRECTION) + FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS, + DAG.getConstant(CondVal, dl, MVT::i8)); else - Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS); - SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond); - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS, + DAG.getConstant(CondVal, dl, MVT::i8), Sae); + // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg" + return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp); } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Index: ../lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- ../lib/Target/X86/X86IntrinsicsInfo.h +++ ../lib/Target/X86/X86IntrinsicsInfo.h @@ -2331,96 +2331,6 @@ std::end(IntrinsicsWithChain)) && "Intrinsic data tables should have unique entries"); } - -// X86 specific compare constants. -// They must be kept in synch with avxintrin.h -#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ -#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ -#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ -#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ -#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ -#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ -#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ -#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ -#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ -#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ -#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ -#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ -#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ -#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ -#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ -#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ -#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ -#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ -#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ -#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */ -#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ -#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ -#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ -#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */ -#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ -#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ -#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ -#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ -#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ -#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ -#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ -#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */ - -/* -* Get comparison modifier from _mm_comi_round_sd/ss intrinsic -* Return tuple -*/ -static std::tuple TranslateX86ConstCondToX86CC(SDValue &imm) { - ConstantSDNode *CImm = dyn_cast(imm); - unsigned IntImm = CImm->getZExtValue(); - // On a floating point condition, the flags are set as follows: - // ZF PF CF op - // 0 | 0 | 0 | X > Y - // 0 | 0 | 1 | X < Y - // 1 | 0 | 0 | X == Y - // 1 | 1 | 1 | unordered - switch (IntImm) { - default: llvm_unreachable("Invalid floating point compare value for Comi!"); - case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling) - case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling) - return std::make_tuple(true, X86::COND_E); - case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling) - case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling) - return std::make_tuple(false , X86::COND_E); - case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling) - case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling) - return std::make_tuple(true, X86::COND_B); - case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling) - case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling) - return std::make_tuple(false , X86::COND_B); - case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling) - case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling) - return std::make_tuple(true, X86::COND_BE); - case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling) - case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling) - return std::make_tuple(false, X86::COND_BE); - case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling) - case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling) - return std::make_tuple(true, X86::COND_A); - case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling) - case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling) - return std::make_tuple(false, X86::COND_A); - case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling) - case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling) - return std::make_tuple(true, X86::COND_AE); - case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling) - case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling) - return std::make_tuple(false, X86::COND_AE); - case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling) - case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling) - return std::make_tuple(true, X86::COND_NE); - case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling) - case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling) - return std::make_tuple(false, X86::COND_NE); - } -} - } // End llvm namespace #endif Index: ../test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- ../test/CodeGen/X86/avx-intrinsics-x86.ll +++ ../test/CodeGen/X86/avx-intrinsics-x86.ll @@ -104,8 +104,10 @@ ; CHECK-LABEL: test_x86_sse2_comieq_sd: ; CHECK: ## BB#0: ; CHECK-NEXT: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setnp %al +; CHECK-NEXT: sete %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -142,8 +144,8 @@ define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse2_comile_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: setbe %al +; CHECK-NEXT: vcomisd %xmm0, %xmm1 +; CHECK-NEXT: setae %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -155,9 +157,9 @@ define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse2_comilt_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcomisd %xmm0, %xmm1 +; CHECK-NEXT: seta %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -169,8 +171,10 @@ ; CHECK-LABEL: test_x86_sse2_comineq_sd: ; CHECK: ## BB#0: ; CHECK-NEXT: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setp %al +; CHECK-NEXT: setne %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -912,8 +916,10 @@ ; CHECK-LABEL: test_x86_sse2_ucomieq_sd: ; CHECK: ## BB#0: ; CHECK-NEXT: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setnp %al +; CHECK-NEXT: sete %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -950,8 +956,8 @@ define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse2_ucomile_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: setbe %al +; CHECK-NEXT: vucomisd %xmm0, %xmm1 +; CHECK-NEXT: setae %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -963,9 +969,9 @@ define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse2_ucomilt_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vucomisd %xmm0, %xmm1 +; CHECK-NEXT: seta %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -977,8 +983,10 @@ ; CHECK-LABEL: test_x86_sse2_ucomineq_sd: ; CHECK: ## BB#0: ; CHECK-NEXT: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setp %al +; CHECK-NEXT: setne %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1699,8 +1707,10 @@ ; CHECK-LABEL: test_x86_sse_comieq_ss: ; CHECK: ## BB#0: ; CHECK-NEXT: vcomiss %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setnp %al +; CHECK-NEXT: sete %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -1737,8 +1747,8 @@ define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_sse_comile_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomiss %xmm1, %xmm0 -; CHECK-NEXT: setbe %al +; CHECK-NEXT: vcomiss %xmm0, %xmm1 +; CHECK-NEXT: setae %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -1750,9 +1760,9 @@ define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_sse_comilt_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomiss %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcomiss %xmm0, %xmm1 +; CHECK-NEXT: seta %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -1764,8 +1774,10 @@ ; CHECK-LABEL: test_x86_sse_comineq_ss: ; CHECK: ## BB#0: ; CHECK-NEXT: vcomiss %xmm1, %xmm0 -; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setp %al +; CHECK-NEXT: setne %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2003,8 +2015,10 @@ ; CHECK-LABEL: test_x86_sse_ucomieq_ss: ; CHECK: ## BB#0: ; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setnp %al +; CHECK-NEXT: sete %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2041,8 +2055,8 @@ define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_sse_ucomile_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setbe %al +; CHECK-NEXT: vucomiss %xmm0, %xmm1 +; CHECK-NEXT: setae %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2054,9 +2068,9 @@ define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_sse_ucomilt_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vucomiss %xmm0, %xmm1 +; CHECK-NEXT: seta %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2068,8 +2082,10 @@ ; CHECK-LABEL: test_x86_sse_ucomineq_ss: ; CHECK: ## BB#0: ; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: setp %al +; CHECK-NEXT: setne %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res Index: ../test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- ../test/CodeGen/X86/avx512-intrinsics.ll +++ ../test/CodeGen/X86/avx512-intrinsics.ll @@ -6307,9 +6307,8 @@ define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) ret i32 %res @@ -6318,9 +6317,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) ret i32 %res @@ -6329,9 +6327,8 @@ define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_comi_sd_eq: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) ret i32 %res @@ -6340,9 +6337,8 @@ define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) ret i32 %res @@ -6351,9 +6347,8 @@ define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) ret i32 %res @@ -6362,9 +6357,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) ret i32 %res @@ -6373,9 +6367,8 @@ define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_comi_sd_lt: ; CHECK: ## BB#0: -; CHECK-NEXT: vcomisd %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) ret i32 %res @@ -6384,9 +6377,8 @@ define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomisd %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) ret i32 %res @@ -6397,9 +6389,8 @@ define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt: ; CHECK: ## BB#0: -; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) ret i32 %res @@ -6855,9 +6846,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxbd %xmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbd %xmm0, %zmm0 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6875,9 +6866,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxbq %xmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbq %xmm0, %zmm0 +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6895,9 +6886,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxdq %ymm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxdq %ymm0, %zmm0 +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6915,9 +6906,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxwd %ymm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwd %ymm0, %zmm0 +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6935,9 +6926,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxwq %xmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwq %xmm0, %zmm0 +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq Index: ../test/CodeGen/X86/sse-intrinsics-x86.ll =================================================================== --- ../test/CodeGen/X86/sse-intrinsics-x86.ll +++ ../test/CodeGen/X86/sse-intrinsics-x86.ll @@ -28,6 +28,7 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: comiss ; CHECK: sete + ; CHECK: and ; CHECK: movzbl %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -57,7 +58,7 @@ define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: comiss - ; CHECK: setbe + ; CHECK: setae ; CHECK: movzbl %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -67,7 +68,7 @@ define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: comiss - ; CHECK: sbb + ; CHECK: seta %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -251,9 +252,14 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: sete - ; CHECK: movzbl +; CHECK-LABEL: test_x86_sse_ucomieq_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: setnp %al +; CHECK-NEXT: sete %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -271,7 +277,7 @@ define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss + ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK: seta ; CHECK: movzbl %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -281,9 +287,12 @@ define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setbe - ; CHECK: movzbl +; CHECK-LABEL: test_x86_sse_ucomile_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: setae %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -291,8 +300,9 @@ define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: sbbl + ; CHECK: ucomiss %xmm0, %xmm1 + ; CHECK: seta + ; CHECK: movzbl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -300,9 +310,14 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setne - ; CHECK: movzbl +; CHECK-LABEL: test_x86_sse_ucomineq_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: setp %al +; CHECK-NEXT: setne %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } Index: ../test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- ../test/CodeGen/X86/sse2-intrinsics-x86.ll +++ ../test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -57,7 +57,7 @@ define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: comisd - ; CHECK: setbe + ; CHECK: setae ; CHECK: movzbl %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -67,8 +67,8 @@ define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: comisd - ; CHECK: sbbl %eax, %eax - ; CHECK: andl $1, %eax + ; CHECK: seta + ; CHECK: movzbl %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -77,6 +77,7 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: comisd + ; CHECK: setp ; CHECK: setne ; CHECK: movzbl %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -654,7 +655,7 @@ define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: ucomisd - ; CHECK: setbe + ; CHECK: setae ; CHECK: movzbl %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -664,7 +665,7 @@ define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: ucomisd - ; CHECK: sbbl + ; CHECK: seta %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res }