Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34288,37 +34288,39 @@ return SDValue(); // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB. - MVT MaskVT; + MVT MaskSrcVT; if (64 == BitWidth || 32 == BitWidth) - MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), - MatchSizeInBits / BitWidth); + MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), + MatchSizeInBits / BitWidth); else - MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); + MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); - APInt CompareBits; + SDLoc DL(Extract); + SDValue CmpC; ISD::CondCode CondCode; if (BinOp == ISD::OR) { // any_of -> MOVMSK != 0 - CompareBits = APInt::getNullValue(32); + CmpC = DAG.getConstant(0, DL, MVT::i32); CondCode = ISD::CondCode::SETNE; } else { // all_of -> MOVMSK == ((1 << NumElts) - 1) - CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements()); + uint64_t NumElts = MaskSrcVT.getVectorNumElements(); + assert(NumElts <= 32 && "Not expecting more than 32 elements"); + CmpC = DAG.getConstant((1ULL << NumElts) - 1, DL, MVT::i32); CondCode = ISD::CondCode::SETEQ; } - // Perform the select as i32/i64 and then truncate to avoid partial register - // stalls. - unsigned ResWidth = std::max(BitWidth, 32u); - EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth); - SDLoc DL(Extract); - SDValue Zero = DAG.getConstant(0, DL, ResVT); - SDValue Ones = DAG.getAllOnesConstant(DL, ResVT); - SDValue Res = DAG.getBitcast(MaskVT, Match); - Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res); - Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32), - Ones, Zero, CondCode); - return DAG.getSExtOrTrunc(Res, DL, ExtractVT); + // The setcc produces an i8 of 0/1, so extend that to the result width and + // negate to get the final 0/-1 mask value. + SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match); + SDValue Movmsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BitcastLogicOp); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT SetccVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + MVT::i32); + SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode); + SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT); + SDValue Zero = DAG.getConstant(0, DL, ExtractVT); + return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext); } static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, Index: llvm/test/CodeGen/X86/vector-compare-all_of.ll =================================================================== --- llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -50,11 +50,11 @@ ; AVX-LABEL: test_v4f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskpd %ymm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $15, %eax -; AVX-NEXT: movq $-1, %rax -; AVX-NEXT: cmovneq %rcx, %rax +; AVX-NEXT: vmovmskpd %ymm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $15, %ecx +; AVX-NEXT: sete %al +; AVX-NEXT: negq %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -87,8 +87,9 @@ ; SSE-NEXT: movmskps %xmm2, %eax ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $15, %eax -; SSE-NEXT: movq $-1, %rax -; SSE-NEXT: cmovneq %rcx, %rax +; SSE-NEXT: sete %cl +; SSE-NEXT: negl %ecx +; SSE-NEXT: movslq %ecx, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64_legal_sext: @@ -99,8 +100,9 @@ ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: cmpl $15, %eax -; AVX-NEXT: movq $-1, %rax -; AVX-NEXT: cmovneq %rcx, %rax +; AVX-NEXT: sete %cl +; AVX-NEXT: negl %ecx +; AVX-NEXT: movslq %ecx, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -132,21 +134,21 @@ ; SSE-LABEL: test_v4f32_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm0, %xmm1 -; SSE-NEXT: movmskps %xmm1, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $15, %eax -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: movmskps %xmm1, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $15, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $15, %eax -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $15, %ecx +; AVX-NEXT: sete %al +; AVX-NEXT: negl %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32_sext: @@ -174,21 +176,21 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: andps %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $15, %eax -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: movmskps %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $15, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskps %ymm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $255, %eax -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vmovmskps %ymm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $255, %ecx +; AVX-NEXT: sete %al +; AVX-NEXT: negl %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -222,11 +224,11 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_legal_sext: @@ -234,11 +236,11 @@ ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX-NEXT: sete %al +; AVX-NEXT: negl %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -319,22 +321,22 @@ ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskpd %ymm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx -; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: movq $-1, %rax -; AVX1-NEXT: cmovneq %rcx, %rax +; AVX1-NEXT: vmovmskpd %ymm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl $15, %ecx +; AVX1-NEXT: sete %al +; AVX1-NEXT: negq %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v4i64_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx -; AVX2-NEXT: cmpl $15, %eax -; AVX2-NEXT: movq $-1, %rax -; AVX2-NEXT: cmovneq %rcx, %rax +; AVX2-NEXT: vmovmskpd %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl $15, %ecx +; AVX2-NEXT: sete %al +; AVX2-NEXT: negq %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -367,8 +369,9 @@ ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $15, %eax -; SSE-NEXT: movq $-1, %rax -; SSE-NEXT: cmovneq %rcx, %rax +; SSE-NEXT: sete %cl +; SSE-NEXT: negl %ecx +; SSE-NEXT: movslq %ecx, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v4i64_legal_sext: @@ -381,8 +384,9 @@ ; AVX1-NEXT: vmovmskps %xmm0, %eax ; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: movq $-1, %rax -; AVX1-NEXT: cmovneq %rcx, %rax +; AVX1-NEXT: sete %cl +; AVX1-NEXT: negl %ecx +; AVX1-NEXT: movslq %ecx, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -394,8 +398,9 @@ ; AVX2-NEXT: vmovmskps %xmm0, %eax ; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $15, %eax -; AVX2-NEXT: movq $-1, %rax -; AVX2-NEXT: cmovneq %rcx, %rax +; AVX2-NEXT: sete %cl +; AVX2-NEXT: negl %ecx +; AVX2-NEXT: movslq %ecx, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -427,21 +432,21 @@ ; SSE-LABEL: test_v4i32_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $15, %eax -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $15, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $15, %eax -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $15, %ecx +; AVX-NEXT: sete %al +; AVX-NEXT: negl %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4i32_sext: @@ -469,11 +474,11 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $15, %eax -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $15, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v8i32_sext: @@ -483,22 +488,22 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx -; AVX1-NEXT: cmpl $255, %eax -; AVX1-NEXT: movl $-1, %eax -; AVX1-NEXT: cmovnel %ecx, %eax +; AVX1-NEXT: vmovmskps %ymm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl $255, %ecx +; AVX1-NEXT: sete %al +; AVX1-NEXT: negl %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8i32_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx -; AVX2-NEXT: cmpl $255, %eax -; AVX2-NEXT: movl $-1, %eax -; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: vmovmskps %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl $255, %ecx +; AVX2-NEXT: sete %al +; AVX2-NEXT: negl %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -532,11 +537,11 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v8i32_legal_sext: @@ -546,11 +551,11 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX1-NEXT: movl $-1, %eax -; AVX1-NEXT: cmovnel %ecx, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX1-NEXT: sete %al +; AVX1-NEXT: negl %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -559,11 +564,11 @@ ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX2-NEXT: movl $-1, %eax -; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX2-NEXT: sete %al +; AVX2-NEXT: negl %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -598,22 +603,22 @@ ; SSE-LABEL: test_v8i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i16_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; AVX-NEXT: sete %al +; AVX-NEXT: negl %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; @@ -647,11 +652,11 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: sete %al +; SSE-NEXT: negl %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; @@ -679,7 +684,8 @@ ; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: cmpl $-1, %ecx -; AVX2-NEXT: cmovel %ecx, %eax +; AVX2-NEXT: sete %al +; AVX2-NEXT: negl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -720,10 +726,10 @@ ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax +; SSE-NEXT: sete %al +; SSE-NEXT: negb %al +; SSE-NEXT: movsbl %al, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq ; @@ -735,10 +741,10 @@ ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX1-NEXT: movl $-1, %eax -; AVX1-NEXT: cmovnel %ecx, %eax +; AVX1-NEXT: sete %al +; AVX1-NEXT: negb %al +; AVX1-NEXT: movsbl %al, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -749,10 +755,10 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX2-NEXT: movl $-1, %eax -; AVX2-NEXT: cmovnel %ecx, %eax +; AVX2-NEXT: sete %al +; AVX2-NEXT: negb %al +; AVX2-NEXT: movsbl %al, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -794,22 +800,18 @@ ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: sete %al +; SSE-NEXT: negb %al ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: sete %al +; AVX-NEXT: negb %al ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8_sext: @@ -847,11 +849,9 @@ ; SSE-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: sete %al +; SSE-NEXT: negb %al ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8_sext: @@ -877,11 +877,10 @@ ; AVX2-LABEL: test_v32i8_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl $-1, %ecx -; AVX2-NEXT: cmovel %ecx, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: cmpl $-1, %eax +; AVX2-NEXT: sete %al +; AVX2-NEXT: negb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1272,22 +1271,18 @@ ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: sete %al +; SSE-NEXT: negb %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX-NEXT: movl $-1, %eax -; AVX-NEXT: cmovnel %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: sete %al +; AVX-NEXT: negb %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v16i8: @@ -1480,11 +1475,9 @@ ; SSE-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: sete %al +; SSE-NEXT: negb %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v16i16: @@ -1495,11 +1488,9 @@ ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX1-NEXT: movl $-1, %eax -; AVX1-NEXT: cmovnel %ecx, %eax -; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: sete %al +; AVX1-NEXT: negb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -1509,11 +1500,9 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; AVX2-NEXT: movl $-1, %eax -; AVX2-NEXT: cmovnel %ecx, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: sete %al +; AVX2-NEXT: negb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1552,11 +1541,9 @@ ; SSE-NEXT: pcmpeqb %xmm3, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: movl $-1, %eax -; SSE-NEXT: cmovnel %ecx, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: sete %al +; SSE-NEXT: negb %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v32i8: @@ -1582,11 +1569,10 @@ ; AVX2-LABEL: bool_reduction_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl $-1, %ecx -; AVX2-NEXT: cmovel %ecx, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: cmpl $-1, %eax +; AVX2-NEXT: sete %al +; AVX2-NEXT: negb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; Index: llvm/test/CodeGen/X86/vector-compare-any_of.ll =================================================================== --- llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -50,9 +50,8 @@ ; AVX-LABEL: test_v4f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskpd %ymm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax +; AVX-NEXT: vmovmskpd %ymm0, %eax +; AVX-NEXT: negl %eax ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -84,10 +83,9 @@ ; SSE-NEXT: cmpltpd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl %eax, %ecx -; SSE-NEXT: sbbl %ecx, %ecx -; SSE-NEXT: movslq %ecx, %rax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbl %eax, %eax +; SSE-NEXT: cltq ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64_legal_sext: @@ -96,10 +94,9 @@ ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorl %ecx, %ecx -; AVX-NEXT: cmpl %eax, %ecx -; AVX-NEXT: sbbl %ecx, %ecx -; AVX-NEXT: movslq %ecx, %rax +; AVX-NEXT: negl %eax +; AVX-NEXT: sbbl %eax, %eax +; AVX-NEXT: cltq ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -131,18 +128,16 @@ ; SSE-LABEL: test_v4f32_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm0, %xmm1 -; SSE-NEXT: movmskps %xmm1, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: movmskps %xmm1, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax +; AVX-NEXT: vmovmskps %xmm0, %eax +; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; @@ -171,18 +166,16 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: orps %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: movmskps %xmm2, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskps %ymm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax +; AVX-NEXT: vmovmskps %ymm0, %eax +; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -217,9 +210,8 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -228,9 +220,8 @@ ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax +; AVX-NEXT: vpmovmskb %xmm0, %eax +; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -312,9 +303,8 @@ ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskpd %ymm0, %ecx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl %ecx, %eax +; AVX1-NEXT: vmovmskpd %ymm0, %eax +; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -322,9 +312,8 @@ ; AVX2-LABEL: test_v4i64_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax +; AVX2-NEXT: vmovmskpd %ymm0, %eax +; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -356,10 +345,9 @@ ; SSE-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: xorl %ecx, %ecx -; SSE-NEXT: cmpl %eax, %ecx -; SSE-NEXT: sbbl %ecx, %ecx -; SSE-NEXT: movslq %ecx, %rax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbl %eax, %eax +; SSE-NEXT: cltq ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v4i64_legal_sext: @@ -370,10 +358,9 @@ ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: xorl %ecx, %ecx -; AVX1-NEXT: cmpl %eax, %ecx -; AVX1-NEXT: sbbl %ecx, %ecx -; AVX1-NEXT: movslq %ecx, %rax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbl %eax, %eax +; AVX1-NEXT: cltq ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -383,10 +370,9 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovmskps %xmm0, %eax -; AVX2-NEXT: xorl %ecx, %ecx -; AVX2-NEXT: cmpl %eax, %ecx -; AVX2-NEXT: sbbl %ecx, %ecx -; AVX2-NEXT: movslq %ecx, %rax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: sbbl %eax, %eax +; AVX2-NEXT: cltq ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -418,18 +404,16 @@ ; SSE-LABEL: test_v4i32_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax +; AVX-NEXT: vmovmskps %xmm0, %eax +; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; @@ -458,9 +442,8 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -471,9 +454,8 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskps %ymm0, %ecx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl %ecx, %eax +; AVX1-NEXT: vmovmskps %ymm0, %eax +; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -481,9 +463,8 @@ ; AVX2-LABEL: test_v8i32_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -518,9 +499,8 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -531,9 +511,8 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl %ecx, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: negl %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -543,9 +522,8 @@ ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax +; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -581,9 +559,8 @@ ; SSE-LABEL: test_v8i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -591,9 +568,8 @@ ; AVX-LABEL: test_v8i16_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax +; AVX-NEXT: vpmovmskb %xmm0, %eax +; AVX-NEXT: negl %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -628,9 +604,8 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -656,9 +631,8 @@ ; AVX2-LABEL: test_v16i16_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: negl %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -699,11 +673,9 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbw %ax, %ax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v16i16_legal_sext: @@ -713,11 +685,9 @@ ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl %ecx, %eax -; AVX1-NEXT: sbbl %eax, %eax -; AVX1-NEXT: # kill: def $ax killed $ax killed $eax +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbw %ax, %ax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -726,11 +696,9 @@ ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax -; AVX2-NEXT: sbbl %eax, %eax -; AVX2-NEXT: # kill: def $ax killed $ax killed $eax +; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: sbbw %ax, %ax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -770,21 +738,17 @@ ; SSE-LABEL: test_v16i8_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax -; AVX-NEXT: sbbl %eax, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: vpmovmskb %xmm0, %eax +; AVX-NEXT: negl %eax +; AVX-NEXT: sbbb %al, %al ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8_sext: @@ -821,11 +785,9 @@ ; SSE-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8_sext: @@ -851,11 +813,9 @@ ; AVX2-LABEL: test_v32i8_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax -; AVX2-NEXT: sbbl %eax, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: sbbb %al, %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1258,21 +1218,17 @@ ; SSE-LABEL: bool_reduction_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl %ecx, %eax -; AVX-NEXT: sbbl %eax, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: vpmovmskb %xmm0, %eax +; AVX-NEXT: negl %eax +; AVX-NEXT: sbbb %al, %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v16i8: @@ -1469,11 +1425,9 @@ ; SSE-NEXT: pcmpeqw %xmm3, %xmm1 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v16i16: @@ -1483,11 +1437,9 @@ ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl %ecx, %eax -; AVX1-NEXT: sbbl %eax, %eax -; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbb %al, %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -1496,11 +1448,9 @@ ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax -; AVX2-NEXT: sbbl %eax, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: sbbb %al, %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1538,11 +1488,9 @@ ; SSE-NEXT: pcmpeqb %xmm2, %xmm0 ; SSE-NEXT: pcmpeqb %xmm3, %xmm1 ; SSE-NEXT: por %xmm0, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl %ecx, %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: sbbb %al, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v32i8: @@ -1568,11 +1516,9 @@ ; AVX2-LABEL: bool_reduction_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl %ecx, %eax -; AVX2-NEXT: sbbl %eax, %eax -; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: sbbb %al, %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ;