Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -19804,22 +19804,21 @@ // (select (x == 0), 0, -1) -> neg & sbb if (isNullConstant(Y) && (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { - SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0); - SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), - SDValue(Neg.getNode(), 1)); - return Res; + SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + Zero = DAG.getConstant(0, DL, Op.getValueType()); + return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp); } Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); Cmp = ConvertCmpIfNecessary(Cmp, DAG); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SDValue Zero = DAG.getConstant(0, DL, Op.getValueType()); SDValue Res = // Res = 0 or -1. - DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp); + DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -362,6 +362,21 @@ def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), (SETBr)>; +// Patterns to give priority when both inputs are zero so that we don't use +// an immediate for the RHS. +// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out? +def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS), + (SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit), + (EXTRACT_SUBREG (MOV32r0), sub_8bit))>; +def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS), + (SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit), + (EXTRACT_SUBREG (MOV32r0), sub_16bit))>; +def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS), + (SBB32rr (MOV32r0), (MOV32r0))>; +def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS), + (SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit), + (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>; + //===----------------------------------------------------------------------===// // String Pseudo Instructions // Index: test/CodeGen/X86/pr35972.ll =================================================================== --- test/CodeGen/X86/pr35972.ll +++ test/CodeGen/X86/pr35972.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: sbbl %ecx, %ecx ; CHECK-NEXT: kmovd %ecx, %k0 Index: test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- test/CodeGen/X86/scheduler-backtracking.ll +++ test/CodeGen/X86/scheduler-backtracking.ll @@ -700,8 +700,8 @@ ; ILP-NEXT: cmpq %rdi, %rsi ; ILP-NEXT: sbbq $0, %rdx ; ILP-NEXT: movl $0, %edx -; ILP-NEXT: sbbq $0, %rdx -; ILP-NEXT: sbbq $0, %rcx +; ILP-NEXT: sbbq %rdx, %rdx +; ILP-NEXT: sbbq %rcx, %rcx ; ILP-NEXT: setae %cl ; ILP-NEXT: movzbl %cl, %ecx ; ILP-NEXT: subq %rcx, %rax @@ -716,8 +716,8 @@ ; HYBRID-NEXT: cmpq %rdi, %rsi ; HYBRID-NEXT: sbbq $0, %rcx ; HYBRID-NEXT: movl $0, %ecx -; HYBRID-NEXT: sbbq $0, %rcx -; HYBRID-NEXT: sbbq $0, %rax +; HYBRID-NEXT: sbbq %rcx, %rcx +; HYBRID-NEXT: sbbq %rax, %rax ; HYBRID-NEXT: setae %al ; HYBRID-NEXT: movzbl %al, %ecx ; HYBRID-NEXT: movl $2, %eax @@ -733,8 +733,8 @@ ; BURR-NEXT: cmpq %rdi, %rsi ; BURR-NEXT: sbbq $0, %rcx ; BURR-NEXT: movl $0, %ecx -; BURR-NEXT: sbbq $0, %rcx -; BURR-NEXT: sbbq $0, %rax +; BURR-NEXT: sbbq %rcx, %rcx +; BURR-NEXT: sbbq %rax, %rax ; BURR-NEXT: setae %al ; BURR-NEXT: movzbl %al, %ecx ; BURR-NEXT: movl $2, %eax @@ -750,8 +750,8 @@ ; SRC-NEXT: cmpq %rdi, %rsi ; SRC-NEXT: sbbq $0, %rax ; SRC-NEXT: movl $0, %eax -; SRC-NEXT: sbbq $0, %rax -; SRC-NEXT: sbbq $0, %rcx +; SRC-NEXT: sbbq %rax, %rax +; SRC-NEXT: sbbq %rcx, %rcx ; SRC-NEXT: setae %al ; SRC-NEXT: movzbl %al, %ecx ; SRC-NEXT: movl $2, %eax @@ -768,8 +768,8 @@ ; LIN-NEXT: cmpq %rdi, %rsi ; LIN-NEXT: sbbq $0, %rdx ; LIN-NEXT: movl $0, %edx -; LIN-NEXT: sbbq $0, %rdx -; LIN-NEXT: sbbq $0, %rcx +; LIN-NEXT: sbbq %rdx, %rdx +; LIN-NEXT: sbbq %rcx, %rcx ; LIN-NEXT: setae %cl ; LIN-NEXT: movzbl %cl, %ecx ; LIN-NEXT: subq %rcx, %rax Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -624,21 +624,13 @@ ;; Test integer select between values and constants. define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test9: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test9: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test9: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test9: ; ATHLON: ## %bb.0: @@ -672,21 +664,13 @@ ;; Same as test9 define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test9a: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test9a: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test9a: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test9a: ; ATHLON: ## %bb.0: @@ -803,6 +787,7 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; CHECK-LABEL: test11: ; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq $1, %rdi ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: notq %rax @@ -842,6 +827,7 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; CHECK-LABEL: test11a: ; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq $1, %rdi ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: notq %rax Index: test/CodeGen/X86/shl-crash-on-legalize.ll =================================================================== --- test/CodeGen/X86/shl-crash-on-legalize.ll +++ test/CodeGen/X86/shl-crash-on-legalize.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE ; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: cmpb $1, %dil ; CHECK-NEXT: sbbb %dl, %dl ; CHECK-NEXT: orb %dl, %cl Index: test/CodeGen/X86/vector-compare-any_of.ll =================================================================== --- test/CodeGen/X86/vector-compare-any_of.ll +++ test/CodeGen/X86/vector-compare-any_of.ll @@ -50,8 +50,9 @@ ; AVX-LABEL: test_v4f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskpd %ymm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskpd %ymm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -83,9 +84,10 @@ ; SSE-NEXT: cmpltpd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: negl %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: cltq +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: cmpl %eax, %ecx +; SSE-NEXT: sbbl %ecx, %ecx +; SSE-NEXT: movslq %ecx, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64_legal_sext: @@ -94,9 +96,10 @@ ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: negl %eax -; AVX-NEXT: sbbl %eax, %eax -; AVX-NEXT: cltq +; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: cmpl %eax, %ecx +; AVX-NEXT: sbbl %ecx, %ecx +; AVX-NEXT: movslq %ecx, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -128,16 +131,18 @@ ; SSE-LABEL: test_v4f32_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm0, %xmm1 -; SSE-NEXT: movmskps %xmm1, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm1, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; @@ -166,16 +171,18 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: orps %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskps %ymm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %ymm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -210,8 +217,9 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -220,8 +228,9 @@ ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -303,8 +312,9 @@ ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskpd %ymm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vmovmskpd %ymm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl %ecx, %eax ; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -312,8 +322,9 @@ ; AVX2-LABEL: test_v4i64_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vmovmskpd %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -345,9 +356,10 @@ ; SSE-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: negl %eax -; SSE-NEXT: sbbl %eax, %eax -; SSE-NEXT: cltq +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: cmpl %eax, %ecx +; SSE-NEXT: sbbl %ecx, %ecx +; SSE-NEXT: movslq %ecx, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v4i64_legal_sext: @@ -358,9 +370,10 @@ ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: negl %eax -; AVX1-NEXT: sbbl %eax, %eax -; AVX1-NEXT: cltq +; AVX1-NEXT: xorl %ecx, %ecx +; AVX1-NEXT: cmpl %eax, %ecx +; AVX1-NEXT: sbbl %ecx, %ecx +; AVX1-NEXT: movslq %ecx, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -370,9 +383,10 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovmskps %xmm0, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: sbbl %eax, %eax -; AVX2-NEXT: cltq +; AVX2-NEXT: xorl %ecx, %ecx +; AVX2-NEXT: cmpl %eax, %ecx +; AVX2-NEXT: sbbl %ecx, %ecx +; AVX2-NEXT: movslq %ecx, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -404,16 +418,18 @@ ; SSE-LABEL: test_v4i32_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; @@ -442,8 +458,9 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -454,8 +471,9 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vmovmskps %ymm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl %ecx, %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -463,8 +481,9 @@ ; AVX2-LABEL: test_v8i32_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vmovmskps %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -499,8 +518,9 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -511,8 +531,9 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl %ecx, %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -522,8 +543,9 @@ ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -559,8 +581,9 @@ ; SSE-LABEL: test_v8i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -568,8 +591,9 @@ ; AVX-LABEL: test_v8i16_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -604,8 +628,9 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -632,8 +657,9 @@ ; AVX2-LABEL: test_v16i16_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -674,8 +700,9 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -687,8 +714,9 @@ ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: cmpl %ecx, %eax ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -699,8 +727,9 @@ ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -742,8 +771,9 @@ ; SSE-LABEL: test_v16i8_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -751,8 +781,9 @@ ; AVX-LABEL: test_v16i8_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: cmpl %ecx, %eax ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -791,8 +822,9 @@ ; SSE-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: cmpl %ecx, %eax ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -821,8 +853,9 @@ ; AVX2-LABEL: test_v32i8_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpl %ecx, %eax ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper