diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -445,6 +445,10 @@ "HasLZCNTFalseDeps", "true", "LZCNT/TZCNT have a false dependency on dest register">; +def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", + "HasSBBDepBreaking", "true", + "SBB with same register has no source dependency">; + // On recent X86 (port bound) processors, its preferable to combine to a single shuffle // using a variable mask over multiple fixed shuffles. def TuningFastVariableCrossLaneShuffle @@ -1032,6 +1036,7 @@ Feature64Bit]; list BarcelonaTuning = [TuningFastScalarShiftMasks, TuningSlowSHLD, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; // Bobcat @@ -1053,6 +1058,7 @@ TuningFastScalarShiftMasks, TuningFastVectorShiftMasks, TuningSlowSHLD, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; // Jaguar @@ -1072,6 +1078,7 @@ TuningFastScalarShiftMasks, TuningFastVectorShiftMasks, TuningFastMOVBE, + TuningSBBDepBreaking, TuningSlowSHLD]; list BtVer2Features = !listconcat(BtVer1Features, BtVer2AdditionalFeatures); @@ -1099,6 +1106,7 @@ TuningFast11ByteNOP, TuningFastScalarShiftMasks, TuningBranchFusion, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; // PileDriver @@ -1174,6 +1182,7 @@ TuningFastScalarShiftMasks, TuningFastMOVBE, TuningSlowSHLD, + TuningSBBDepBreaking, TuningInsertVZEROUPPER]; list ZN2AdditionalFeatures = [FeatureCLWB, FeatureRDPID, @@ -1445,7 +1454,7 @@ def : Proc; + TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { @@ -1453,7 +1462,7 @@ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV, Feature64Bit], [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, - TuningInsertVZEROUPPER]>; + TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; } foreach P = ["amdfam10", "barcelona"] in { diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -464,8 +464,13 @@ } // Copy flags to the EFLAGS register and glue it to next node. - SDValue EFLAGS = CurDAG->getCopyToReg( - CurDAG->getEntryNode(), dl, X86::EFLAGS, N->getOperand(2), SDValue()); + unsigned Opcode = N->getOpcode(); + assert(Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY && + "Unexpected opcode for SBB materialization"); + unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1; + SDValue EFLAGS = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, + N->getOperand(FlagOpIndex), SDValue()); // Create a 64-bit instruction if the result is 64-bits otherwise use the // 32-bit version. @@ -5801,21 +5806,28 @@ break; case X86ISD::SETCC_CARRY: { - // We have to do this manually because tblgen will put the eflags copy in - // the wrong place if we use an extract_subreg in the pattern. MVT VT = Node->getSimpleValueType(0); + SDValue Result; + if (Subtarget->hasSBBDepBreaking()) { + // We have to do this manually because tblgen will put the eflags copy in + // the wrong place if we use an extract_subreg in the pattern. + // Copy flags to the EFLAGS register and glue it to next node. + SDValue EFLAGS = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, + Node->getOperand(1), SDValue()); - // Copy flags to the EFLAGS register and glue it to next node. - SDValue EFLAGS = - CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, - Node->getOperand(1), SDValue()); - - // Create a 64-bit instruction if the result is 64-bits otherwise use the - // 32-bit version. - unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; - MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; - SDValue Result = SDValue( - CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0); + // Create a 64-bit instruction if the result is 64-bits otherwise use the + // 32-bit version. + unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; + MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; + Result = SDValue( + CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), + 0); + } else { + // The target does not recognize sbb with the same reg operand as a + // no-source idiom, so we explicitly zero the input values. + Result = getSBBZero(Node); + } // For less than 32-bits we need to extract from the 32-bit node. if (VT == MVT::i8 || VT == MVT::i16) { diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -246,6 +246,10 @@ /// True if LZCNT/TZCNT instructions have a false dependency on the destination register. bool HasLZCNTFalseDeps = false; + /// True if an SBB instruction with same source register is recognized as + /// having no dependency on that register. + bool HasSBBDepBreaking = false; + /// True if its preferable to combine to a single cross-lane shuffle /// using a variable mask over multiple fixed shuffles. bool HasFastVariableCrossLaneShuffle = false; @@ -719,6 +723,7 @@ bool useLeaForSP() const { return UseLeaForSP; } bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; } bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; } + bool hasSBBDepBreaking() const { return HasSBBDepBreaking; } bool hasFastVariableCrossLaneShuffle() const { return HasFastVariableCrossLaneShuffle; } diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll --- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll @@ -139,8 +139,9 @@ ; CHECK-LABEL: movmskps_concat_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vmovmskps %xmm0, %eax -; CHECK-NEXT: negl %eax +; CHECK-NEXT: vmovmskps %xmm0, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negl %ecx ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> @@ -153,9 +154,10 @@ define i32 @movmskps_demanded_concat_v4f32(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: movmskps_demanded_concat_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovmskps %xmm0, %eax -; CHECK-NEXT: andl $3, %eax -; CHECK-NEXT: negl %eax +; CHECK-NEXT: vmovmskps %xmm0, %ecx +; CHECK-NEXT: andl $3, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negl %ecx ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll --- a/llvm/test/CodeGen/X86/copy-eflags.ll +++ b/llvm/test/CodeGen/X86/copy-eflags.ll @@ -293,6 +293,7 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind { ; X32-LABEL: PR37431: ; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi @@ -302,10 +303,11 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl (%edi), %edi -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: sarl $31, %ebx +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: sarl $31, %ebp +; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp) -; X32-NEXT: sbbl %ebx, %esi +; X32-NEXT: sbbl %ebp, %esi ; X32-NEXT: sbbl %ebx, %ebx ; X32-NEXT: movb %bl, (%edx) ; X32-NEXT: cltd @@ -314,6 +316,7 @@ ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp ; X32-NEXT: retl ; ; X64-LABEL: PR37431: @@ -321,6 +324,7 @@ ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movslq (%rdi), %rdx +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpq %rdx, %r8 ; X64-NEXT: sbbl %edi, %edi ; X64-NEXT: movb %dil, (%rsi) diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll --- a/llvm/test/CodeGen/X86/jump_sign.ll +++ b/llvm/test/CodeGen/X86/jump_sign.ll @@ -310,6 +310,7 @@ ; CHECK-LABEL: func_q: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: sbbl %ecx, %ecx ; CHECK-NEXT: negl %eax diff --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll --- a/llvm/test/CodeGen/X86/machine-cse.ll +++ b/llvm/test/CodeGen/X86/machine-cse.ll @@ -112,6 +112,8 @@ ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: ja .LBB2_2 ; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: .LBB2_2: # %return ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr32588.ll b/llvm/test/CodeGen/X86/pr32588.ll --- a/llvm/test/CodeGen/X86/pr32588.ll +++ b/llvm/test/CodeGen/X86/pr32588.ll @@ -8,6 +8,7 @@ define void @fn1() { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $1, c(%rip) ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: andl $1, %eax diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll --- a/llvm/test/CodeGen/X86/pr35972.ll +++ b/llvm/test/CodeGen/X86/pr35972.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: sbbl %ecx, %ecx ; CHECK-NEXT: kmovd %ecx, %k0 diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll --- a/llvm/test/CodeGen/X86/sbb-false-dep.ll +++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM %struct.y_s = type { i64*, i64* } @@ -24,13 +25,15 @@ ; CHECK-NEXT: callq foo1@PLT ; CHECK-NEXT: movq 8(%rbx), %rax ; CHECK-NEXT: movq (%rax), %rdx +; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: movl %r13d, %ecx ; CHECK-NEXT: negl %ecx -; CHECK-NEXT: sbbq %rbp, %rbp -; CHECK-NEXT: orq %rdx, %rbp -; CHECK-NEXT: cmpl $1, %r13d +; CHECK-NEXT: movl $0, %eax ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: orq %rdx, %rax +; CHECK-NEXT: cmpl $1, %r13d +; CHECK-NEXT: sbbq %rbp, %rbp +; CHECK-NEXT: orq %rdx, %rbp ; CHECK-NEXT: subq $8, %rsp ; CHECK-NEXT: movq %r12, %rdi ; CHECK-NEXT: movl %r15d, %esi @@ -38,8 +41,8 @@ ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %r8d, %r8d ; CHECK-NEXT: xorl %r9d, %r9d -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: callq foo2@PLT ; CHECK-NEXT: addq $40, %rsp @@ -50,6 +53,53 @@ ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq +; +; IDIOM-LABEL: mallocbench_gs: +; IDIOM: # %bb.0: +; IDIOM-NEXT: pushq %rbp +; IDIOM-NEXT: pushq %r15 +; IDIOM-NEXT: pushq %r14 +; IDIOM-NEXT: pushq %r13 +; IDIOM-NEXT: pushq %r12 +; IDIOM-NEXT: pushq %rbx +; IDIOM-NEXT: pushq %rax +; IDIOM-NEXT: movl %r8d, %r13d +; IDIOM-NEXT: movl %ecx, %r14d +; IDIOM-NEXT: movl %edx, %r15d +; IDIOM-NEXT: movq %rsi, %rbx +; IDIOM-NEXT: movq %rdi, %r12 +; IDIOM-NEXT: movq (%rsi), %rdi +; IDIOM-NEXT: movq 8(%rsi), %rsi +; IDIOM-NEXT: movq %rbx, %rdx +; IDIOM-NEXT: callq foo1@PLT +; IDIOM-NEXT: movq 8(%rbx), %rax +; IDIOM-NEXT: movq (%rax), %rdx +; IDIOM-NEXT: movl %r13d, %ecx +; IDIOM-NEXT: negl %ecx +; IDIOM-NEXT: sbbq %rbp, %rbp +; IDIOM-NEXT: orq %rdx, %rbp +; IDIOM-NEXT: cmpl $1, %r13d +; IDIOM-NEXT: sbbq %rax, %rax +; IDIOM-NEXT: orq %rdx, %rax +; IDIOM-NEXT: subq $8, %rsp +; IDIOM-NEXT: movq %r12, %rdi +; IDIOM-NEXT: movl %r15d, %esi +; IDIOM-NEXT: movl %r14d, %edx +; IDIOM-NEXT: xorl %ecx, %ecx +; IDIOM-NEXT: xorl %r8d, %r8d +; IDIOM-NEXT: xorl %r9d, %r9d +; IDIOM-NEXT: pushq %rax +; IDIOM-NEXT: pushq %rbp +; IDIOM-NEXT: pushq %rbx +; IDIOM-NEXT: callq foo2@PLT +; IDIOM-NEXT: addq $40, %rsp +; IDIOM-NEXT: popq %rbx +; IDIOM-NEXT: popq %r12 +; IDIOM-NEXT: popq %r13 +; IDIOM-NEXT: popq %r14 +; IDIOM-NEXT: popq %r15 +; IDIOM-NEXT: popq %rbp +; IDIOM-NEXT: retq %6 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 0 %7 = load i64*, i64** %6, align 8 %8 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 1 diff --git a/llvm/test/CodeGen/X86/sbb-zero-idiom.ll b/llvm/test/CodeGen/X86/sbb-zero-idiom.ll --- a/llvm/test/CodeGen/X86/sbb-zero-idiom.ll +++ b/llvm/test/CodeGen/X86/sbb-zero-idiom.ll @@ -1,18 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8 | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK + +; Check the attribute. + +; RUN: llc < %s -mtriple=x86_64-- -mattr=-sbb-dep-breaking | FileCheck %s --check-prefixes=ZERO +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM + +; And check that CPUs have included the attribute as expected. + +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=ZERO +; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=ZERO +; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=ZERO +; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8 | FileCheck %s --check-prefixes=IDIOM +; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=IDIOM +; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=IDIOM +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=IDIOM define i32 @i32_select_0_or_neg1(i32 %x) { -; CHECK-LABEL: i32_select_0_or_neg1: -; CHECK: # %bb.0: -; CHECK-NEXT: negl %edi -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: retq +; ZERO-LABEL: i32_select_0_or_neg1: +; ZERO: # %bb.0: +; ZERO-NEXT: xorl %eax, %eax +; ZERO-NEXT: negl %edi +; ZERO-NEXT: sbbl %eax, %eax +; ZERO-NEXT: retq +; +; IDIOM-LABEL: i32_select_0_or_neg1: +; IDIOM: # %bb.0: +; IDIOM-NEXT: negl %edi +; IDIOM-NEXT: sbbl %eax, %eax +; IDIOM-NEXT: retq %cmp = icmp ne i32 %x, 0 %sel = select i1 %cmp, i32 -1, i32 0 ret i32 %sel diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll --- a/llvm/test/CodeGen/X86/sbb.ll +++ b/llvm/test/CodeGen/X86/sbb.ll @@ -8,6 +8,7 @@ define i8 @i8_select_0_or_neg1(i8 %x) { ; CHECK-LABEL: i8_select_0_or_neg1: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: negb %dil ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -22,6 +23,7 @@ define i16 @i16_select_0_or_neg1_as_math(i16 %x) { ; CHECK-LABEL: i16_select_0_or_neg1_as_math: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: negw %di ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax @@ -37,6 +39,7 @@ define i32 @i32_select_0_or_neg1_commuted(i32 %x) { ; CHECK-LABEL: i32_select_0_or_neg1_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: negl %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -50,6 +53,7 @@ define i64 @i64_select_0_or_neg1_commuted_as_math(i64 %x) { ; CHECK-LABEL: i64_select_0_or_neg1_commuted_as_math: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: negq %rdi ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: retq @@ -64,6 +68,7 @@ define i64 @i64_select_neg1_or_0(i64 %x) { ; CHECK-LABEL: i64_select_neg1_or_0: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq $1, %rdi ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: retq @@ -77,6 +82,7 @@ define i32 @i32_select_neg1_or_0_as_math(i32 %x) { ; CHECK-LABEL: i32_select_neg1_or_0_as_math: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -91,6 +97,7 @@ define i16 @i16_select_neg1_or_0_commuted(i16 %x) { ; CHECK-LABEL: i16_select_neg1_or_0_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpw $1, %di ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax @@ -105,6 +112,7 @@ define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) { ; CHECK-LABEL: i8_select_neg1_or_0_commuted_as_math: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $1, %dil ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -120,6 +128,7 @@ define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ult_select_neg1_or_0: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -134,6 +143,7 @@ define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ugt_select_neg1_or_0: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -148,6 +158,7 @@ define i32 @uge_select_0_or_neg1(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: uge_select_0_or_neg1: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -163,6 +174,7 @@ define i32 @ule_select_0_or_neg1(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ule_select_0_or_neg1: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -178,6 +190,7 @@ define i32 @uge_select_0_or_neg1_sub(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: uge_select_0_or_neg1_sub: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq @@ -193,6 +206,7 @@ define i64 @ugt_select_neg1_or_0_sub(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ugt_select_neg1_or_0_sub: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq %rdi, %rsi ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: retq @@ -208,6 +222,7 @@ define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: ult_select_neg1_or_0_sub: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpw %di, %si ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax @@ -226,6 +241,7 @@ define void @PR33560(i8 %x, i64 %y) { ; CHECK-LABEL: PR33560: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: negb %dil ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: cmpq %rsi, %rax diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -1219,6 +1219,7 @@ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: andl %eax, %ebx ; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %ecx ; X86-NEXT: sbbl %ecx, %ecx ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1242,6 +1243,7 @@ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: andl %eax, %edi ; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %eax ; X86-NEXT: sbbl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -1268,6 +1270,7 @@ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: andl %eax, %edx ; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %eax ; X86-NEXT: sbbl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -1291,6 +1294,7 @@ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: andl %eax, %edi ; X86-NEXT: negl %eax +; X86-NEXT: movl $0, %eax ; X86-NEXT: sbbl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -629,21 +629,13 @@ ;; Test integer select between values and constants. define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test9: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test9: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test9: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test9: ; ATHLON: ## %bb.0: @@ -677,21 +669,13 @@ ;; Same as test9 define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test9a: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test9a: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test9a: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test9a: ; ATHLON: ## %bb.0: @@ -723,21 +707,13 @@ } define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test9b: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test9b: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test9b: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test9b: ; ATHLON: ## %bb.0: @@ -770,21 +746,13 @@ ;; Select between -1 and 1. define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test10: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq $1, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test10: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq $1, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test10: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq $1, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test10: ; ATHLON: ## %bb.0: @@ -814,21 +782,13 @@ } define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test11: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: negq %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test11: -; ATOM: ## %bb.0: -; ATOM-NEXT: negq %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test11: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negq %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test11: ; ATHLON: ## %bb.0: @@ -861,21 +821,13 @@ } define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test11a: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: negq %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq %rsi, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: test11a: -; ATOM: ## %bb.0: -; ATOM-NEXT: negq %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq %rsi, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: test11a: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negq %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: test11a: ; ATHLON: ## %bb.0: @@ -907,21 +859,13 @@ } define i32 @eqzero_const_or_all_ones(i32 %x) { -; GENERIC-LABEL: eqzero_const_or_all_ones: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: negl %edi -; GENERIC-NEXT: sbbl %eax, %eax -; GENERIC-NEXT: orl $42, %eax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: eqzero_const_or_all_ones: -; ATOM: ## %bb.0: -; ATOM-NEXT: negl %edi -; ATOM-NEXT: sbbl %eax, %eax -; ATOM-NEXT: orl $42, %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: eqzero_const_or_all_ones: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negl %edi +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: orl $42, %eax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: eqzero_const_or_all_ones: ; ATHLON: ## %bb.0: @@ -933,9 +877,11 @@ ; ; MCU-LABEL: eqzero_const_or_all_ones: ; MCU: # %bb.0: +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: negl %eax -; MCU-NEXT: sbbl %eax, %eax -; MCU-NEXT: orl $42, %eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: orl $42, %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %z = icmp eq i32 %x, 0 %r = select i1 %z, i32 42, i32 -1 @@ -943,24 +889,17 @@ } define i32 @nezero_const_or_all_ones(i32 %x) { -; GENERIC-LABEL: nezero_const_or_all_ones: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpl $1, %edi -; GENERIC-NEXT: sbbl %eax, %eax -; GENERIC-NEXT: orl $42, %eax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: nezero_const_or_all_ones: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpl $1, %edi -; ATOM-NEXT: sbbl %eax, %eax -; ATOM-NEXT: orl $42, %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: nezero_const_or_all_ones: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: orl $42, %eax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: nezero_const_or_all_ones: ; ATHLON: ## %bb.0: +; ATHLON-NEXT: xorl %eax, %eax ; ATHLON-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; ATHLON-NEXT: sbbl %eax, %eax ; ATHLON-NEXT: orl $42, %eax @@ -968,9 +907,11 @@ ; ; MCU-LABEL: nezero_const_or_all_ones: ; MCU: # %bb.0: +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: cmpl $1, %eax -; MCU-NEXT: sbbl %eax, %eax -; MCU-NEXT: orl $42, %eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: orl $42, %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %z = icmp ne i32 %x, 0 %r = select i1 %z, i32 42, i32 -1 @@ -978,21 +919,13 @@ } define i64 @eqzero_all_ones_or_const(i64 %x) { -; GENERIC-LABEL: eqzero_all_ones_or_const: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq $42, %rax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: eqzero_all_ones_or_const: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq $42, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: eqzero_all_ones_or_const: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: orq $42, %rax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: eqzero_all_ones_or_const: ; ATHLON: ## %bb.0: @@ -1022,23 +955,14 @@ } define i8 @nezero_all_ones_or_const(i8 %x) { -; GENERIC-LABEL: nezero_all_ones_or_const: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: negb %dil -; GENERIC-NEXT: sbbl %eax, %eax -; GENERIC-NEXT: orb $42, %al -; GENERIC-NEXT: ## kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: nezero_all_ones_or_const: -; ATOM: ## %bb.0: -; ATOM-NEXT: negb %dil -; ATOM-NEXT: sbbl %eax, %eax -; ATOM-NEXT: orb $42, %al -; ATOM-NEXT: ## kill: def $al killed $al killed $eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: nezero_all_ones_or_const: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negb %dil +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: orb $42, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: nezero_all_ones_or_const: ; ATHLON: ## %bb.0: @@ -1051,10 +975,11 @@ ; ; MCU-LABEL: nezero_all_ones_or_const: ; MCU: # %bb.0: +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: negb %al -; MCU-NEXT: sbbl %eax, %eax -; MCU-NEXT: orb $42, %al -; MCU-NEXT: # kill: def $al killed $al killed $eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: orb $42, %cl +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %z = icmp ne i8 %x, 0 %r = select i1 %z, i8 -1, i8 42 @@ -1062,21 +987,13 @@ } define i32 @PR53006(i32 %x) { -; GENERIC-LABEL: PR53006: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: negl %edi -; GENERIC-NEXT: sbbl %eax, %eax -; GENERIC-NEXT: orl $1, %eax -; GENERIC-NEXT: retq -; -; ATOM-LABEL: PR53006: -; ATOM: ## %bb.0: -; ATOM-NEXT: negl %edi -; ATOM-NEXT: sbbl %eax, %eax -; ATOM-NEXT: orl $1, %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq +; CHECK-LABEL: PR53006: +; CHECK: ## %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: negl %edi +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: orl $1, %eax +; CHECK-NEXT: retq ; ; ATHLON-LABEL: PR53006: ; ATHLON: ## %bb.0: @@ -1088,9 +1005,11 @@ ; ; MCU-LABEL: PR53006: ; MCU: # %bb.0: +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: negl %eax -; MCU-NEXT: sbbl %eax, %eax -; MCU-NEXT: orl $1, %eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: orl $1, %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %z = icmp eq i32 %x, 0 %r = select i1 %z, i32 1, i32 -1 @@ -1100,31 +1019,34 @@ define i32 @test13(i32 %a, i32 %b) nounwind { ; GENERIC-LABEL: test13: ; GENERIC: ## %bb.0: +; GENERIC-NEXT: xorl %eax, %eax ; GENERIC-NEXT: cmpl %esi, %edi ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test13: ; ATOM: ## %bb.0: +; ATOM-NEXT: xorl %eax, %eax ; ATOM-NEXT: cmpl %esi, %edi ; ATOM-NEXT: sbbl %eax, %eax ; ATOM-NEXT: nop ; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test13: ; ATHLON: ## %bb.0: -; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax -; ATHLON-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx +; ATHLON-NEXT: xorl %eax, %eax +; ATHLON-NEXT: cmpl {{[0-9]+}}(%esp), %ecx ; ATHLON-NEXT: sbbl %eax, %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: test13: ; MCU: # %bb.0: +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: cmpl %edx, %eax -; MCU-NEXT: sbbl %eax, %eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %c = icmp ult i32 %a, %b %d = sext i1 %c to i32 @@ -1172,18 +1094,18 @@ define i32 @test15(i32 %x) nounwind { ; GENERIC-LABEL: test15: ; GENERIC: ## %bb.0: ## %entry +; GENERIC-NEXT: xorl %eax, %eax ; GENERIC-NEXT: negl %edi ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test15: ; ATOM: ## %bb.0: ## %entry +; ATOM-NEXT: xorl %eax, %eax ; ATOM-NEXT: negl %edi ; ATOM-NEXT: sbbl %eax, %eax ; ATOM-NEXT: nop ; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test15: @@ -1195,8 +1117,10 @@ ; ; MCU-LABEL: test15: ; MCU: # %bb.0: # %entry +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: negl %eax -; MCU-NEXT: sbbl %eax, %eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl entry: %cmp = icmp ne i32 %x, 0 @@ -1207,18 +1131,18 @@ define i64 @test16(i64 %x) nounwind uwtable readnone ssp { ; GENERIC-LABEL: test16: ; GENERIC: ## %bb.0: ## %entry +; GENERIC-NEXT: xorl %eax, %eax ; GENERIC-NEXT: negq %rdi ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test16: ; ATOM: ## %bb.0: ## %entry +; ATOM-NEXT: xorl %eax, %eax ; ATOM-NEXT: negq %rdi ; ATOM-NEXT: sbbq %rax, %rax ; ATOM-NEXT: nop ; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test16: @@ -1249,6 +1173,7 @@ define i16 @test17(i16 %x) nounwind { ; GENERIC-LABEL: test17: ; GENERIC: ## %bb.0: ## %entry +; GENERIC-NEXT: xorl %eax, %eax ; GENERIC-NEXT: negw %di ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: ## kill: def $ax killed $ax killed $eax @@ -1256,13 +1181,12 @@ ; ; ATOM-LABEL: test17: ; ATOM: ## %bb.0: ## %entry +; ATOM-NEXT: xorl %eax, %eax ; ATOM-NEXT: negw %di ; ATOM-NEXT: sbbl %eax, %eax ; ATOM-NEXT: ## kill: def $ax killed $ax killed $eax ; ATOM-NEXT: nop ; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test17: @@ -1275,9 +1199,10 @@ ; ; MCU-LABEL: test17: ; MCU: # %bb.0: # %entry +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: negw %ax -; MCU-NEXT: sbbl %eax, %eax -; MCU-NEXT: # kill: def $ax killed $ax killed $eax +; MCU-NEXT: sbbl %ecx, %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl entry: %cmp = icmp ne i16 %x, 0 diff --git a/llvm/test/CodeGen/X86/sext-i1.ll b/llvm/test/CodeGen/X86/sext-i1.ll --- a/llvm/test/CodeGen/X86/sext-i1.ll +++ b/llvm/test/CodeGen/X86/sext-i1.ll @@ -8,12 +8,14 @@ define i32 @t1(i32 %x) nounwind readnone ssp { ; X32-LABEL: t1: ; X32: # %bb.0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; X32-NEXT: sbbl %eax, %eax ; X32-NEXT: retl ; ; X64-LABEL: t1: ; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl $1, %edi ; X64-NEXT: sbbl %eax, %eax ; X64-NEXT: retq @@ -25,12 +27,14 @@ define i32 @t2(i32 %x) nounwind readnone ssp { ; X32-LABEL: t2: ; X32: # %bb.0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; X32-NEXT: sbbl %eax, %eax ; X32-NEXT: retl ; ; X64-LABEL: t2: ; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl $1, %edi ; X64-NEXT: sbbl %eax, %eax ; X64-NEXT: retq @@ -43,6 +47,7 @@ ; X32-LABEL: t3: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; X32-NEXT: sbbl %ecx, %ecx ; X32-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) @@ -87,6 +92,7 @@ ; ; X64-LABEL: t4: ; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq $1, %rdi ; X64-NEXT: sbbl %eax, %eax ; X64-NEXT: retq @@ -98,6 +104,7 @@ define i64 @t5(i32 %x) nounwind readnone ssp { ; X32-LABEL: t5: ; X32: # %bb.0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; X32-NEXT: sbbl %eax, %eax ; X32-NEXT: movl %eax, %edx @@ -105,6 +112,7 @@ ; ; X64-LABEL: t5: ; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl $1, %edi ; X64-NEXT: sbbq %rax, %rax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll b/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll --- a/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll +++ b/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll @@ -14,6 +14,7 @@ ; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE ; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: cmpb $1, %dil ; CHECK-NEXT: sbbl %ecx, %ecx ; CHECK-NEXT: orb %sil, %cl diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll --- a/llvm/test/CodeGen/X86/umul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll @@ -443,29 +443,30 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: mull %edi -; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: addl %edx, %edi ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: addl %esi, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: addl %edi, %eax ; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: adcl $0, %esi ; X86-NEXT: addl %ebp, %edx -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: negl %ebx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: negl %esi ; X86-NEXT: sbbl %ecx, %ecx ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: orl %ecx, %edx @@ -521,11 +522,12 @@ ; X86-NEXT: shrdl $31, %edx, %eax ; X86-NEXT: movl %edx, %esi ; X86-NEXT: shrl $31, %esi +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: negl %esi -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: orl %esi, %eax +; X86-NEXT: sbbl %edi, %edi +; X86-NEXT: orl %edi, %eax ; X86-NEXT: shrdl $31, %ecx, %edx -; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl %edi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll --- a/llvm/test/CodeGen/X86/vec_uaddo.ll +++ b/llvm/test/CodeGen/X86/vec_uaddo.ll @@ -26,6 +26,7 @@ define <1 x i32> @uaddo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) nounwind { ; CHECK-LABEL: uaddo_v1i32: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: addl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: movl %edi, (%rdx) @@ -1139,14 +1140,16 @@ ; SSE2-LABEL: uaddo_v2i128: ; SSE2: # %bb.0: ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE2-NEXT: xorl %r11d, %r11d ; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx ; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSE2-NEXT: movl $0, %eax ; SSE2-NEXT: sbbl %eax, %eax ; SSE2-NEXT: addq %r8, %rdi ; SSE2-NEXT: adcq %r9, %rsi ; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: sbbl %eax, %eax -; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: sbbl %r11d, %r11d +; SSE2-NEXT: movd %r11d, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movq %rdx, 16(%r10) ; SSE2-NEXT: movq %rdi, (%r10) @@ -1157,14 +1160,16 @@ ; SSSE3-LABEL: uaddo_v2i128: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSSE3-NEXT: xorl %r11d, %r11d ; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx ; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSSE3-NEXT: movl $0, %eax ; SSSE3-NEXT: sbbl %eax, %eax ; SSSE3-NEXT: addq %r8, %rdi ; SSSE3-NEXT: adcq %r9, %rsi ; SSSE3-NEXT: movd %eax, %xmm1 -; SSSE3-NEXT: sbbl %eax, %eax -; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: sbbl %r11d, %r11d +; SSSE3-NEXT: movd %r11d, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movq %rdx, 16(%r10) ; SSSE3-NEXT: movq %rdi, (%r10) @@ -1175,14 +1180,16 @@ ; SSE41-LABEL: uaddo_v2i128: ; SSE41: # %bb.0: ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE41-NEXT: xorl %r11d, %r11d ; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx ; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx -; SSE41-NEXT: sbbl %r11d, %r11d +; SSE41-NEXT: movl $0, %eax +; SSE41-NEXT: sbbl %eax, %eax ; SSE41-NEXT: addq %r8, %rdi ; SSE41-NEXT: adcq %r9, %rsi -; SSE41-NEXT: sbbl %eax, %eax -; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pinsrd $1, %r11d, %xmm0 +; SSE41-NEXT: sbbl %r11d, %r11d +; SSE41-NEXT: movd %r11d, %xmm0 +; SSE41-NEXT: pinsrd $1, %eax, %xmm0 ; SSE41-NEXT: movq %rdx, 16(%r10) ; SSE41-NEXT: movq %rdi, (%r10) ; SSE41-NEXT: movq %rcx, 24(%r10) @@ -1192,14 +1199,16 @@ ; AVX-LABEL: uaddo_v2i128: ; AVX: # %bb.0: ; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX-NEXT: xorl %r11d, %r11d ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: sbbl %r11d, %r11d +; AVX-NEXT: movl $0, %eax +; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: addq %r8, %rdi ; AVX-NEXT: adcq %r9, %rsi -; AVX-NEXT: sbbl %eax, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpinsrd $1, %r11d, %xmm0, %xmm0 +; AVX-NEXT: sbbl %r11d, %r11d +; AVX-NEXT: vmovd %r11d, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: movq %rdx, 16(%r10) ; AVX-NEXT: movq %rdi, (%r10) ; AVX-NEXT: movq %rcx, 24(%r10) diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll --- a/llvm/test/CodeGen/X86/vec_usubo.ll +++ b/llvm/test/CodeGen/X86/vec_usubo.ll @@ -26,6 +26,7 @@ define <1 x i32> @usubo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) nounwind { ; CHECK-LABEL: usubo_v1i32: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: subl %esi, %edi ; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: movl %edi, (%rdx) @@ -1186,14 +1187,16 @@ ; SSE2-LABEL: usubo_v2i128: ; SSE2: # %bb.0: ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE2-NEXT: xorl %r11d, %r11d ; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx ; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSE2-NEXT: movl $0, %eax ; SSE2-NEXT: sbbl %eax, %eax ; SSE2-NEXT: subq %r8, %rdi ; SSE2-NEXT: sbbq %r9, %rsi ; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: sbbl %eax, %eax -; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: sbbl %r11d, %r11d +; SSE2-NEXT: movd %r11d, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movq %rdx, 16(%r10) ; SSE2-NEXT: movq %rdi, (%r10) @@ -1204,14 +1207,16 @@ ; SSSE3-LABEL: usubo_v2i128: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSSE3-NEXT: xorl %r11d, %r11d ; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx ; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSSE3-NEXT: movl $0, %eax ; SSSE3-NEXT: sbbl %eax, %eax ; SSSE3-NEXT: subq %r8, %rdi ; SSSE3-NEXT: sbbq %r9, %rsi ; SSSE3-NEXT: movd %eax, %xmm1 -; SSSE3-NEXT: sbbl %eax, %eax -; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: sbbl %r11d, %r11d +; SSSE3-NEXT: movd %r11d, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movq %rdx, 16(%r10) ; SSSE3-NEXT: movq %rdi, (%r10) @@ -1222,14 +1227,16 @@ ; SSE41-LABEL: usubo_v2i128: ; SSE41: # %bb.0: ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE41-NEXT: xorl %r11d, %r11d ; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx ; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx -; SSE41-NEXT: sbbl %r11d, %r11d +; SSE41-NEXT: movl $0, %eax +; SSE41-NEXT: sbbl %eax, %eax ; SSE41-NEXT: subq %r8, %rdi ; SSE41-NEXT: sbbq %r9, %rsi -; SSE41-NEXT: sbbl %eax, %eax -; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pinsrd $1, %r11d, %xmm0 +; SSE41-NEXT: sbbl %r11d, %r11d +; SSE41-NEXT: movd %r11d, %xmm0 +; SSE41-NEXT: pinsrd $1, %eax, %xmm0 ; SSE41-NEXT: movq %rdx, 16(%r10) ; SSE41-NEXT: movq %rdi, (%r10) ; SSE41-NEXT: movq %rcx, 24(%r10) @@ -1239,14 +1246,16 @@ ; AVX-LABEL: usubo_v2i128: ; AVX: # %bb.0: ; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVX-NEXT: xorl %r11d, %r11d ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: sbbl %r11d, %r11d +; AVX-NEXT: movl $0, %eax +; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: subq %r8, %rdi ; AVX-NEXT: sbbq %r9, %rsi -; AVX-NEXT: sbbl %eax, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpinsrd $1, %r11d, %xmm0, %xmm0 +; AVX-NEXT: sbbl %r11d, %r11d +; AVX-NEXT: vmovd %r11d, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: movq %rdx, 16(%r10) ; AVX-NEXT: movq %rdi, (%r10) ; AVX-NEXT: movq %rcx, 24(%r10) diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -8,24 +8,27 @@ ; SSE-LABEL: test_v2f64_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltpd %xmm0, %xmm1 -; SSE-NEXT: movmskpd %xmm1, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskpd %xmm1, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskpd %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vmovmskpd %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskpd %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: retq %c = fcmp ogt <2 x double> %a0, %a1 @@ -42,16 +45,18 @@ ; SSE-NEXT: cmpltpd %xmm1, %xmm3 ; SSE-NEXT: cmpltpd %xmm0, %xmm2 ; SSE-NEXT: orpd %xmm3, %xmm2 -; SSE-NEXT: movmskpd %xmm2, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskpd %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskpd %ymm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskpd %ymm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -59,8 +64,9 @@ ; AVX512-LABEL: test_v4f64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vmovmskpd %ymm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskpd %ymm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -80,8 +86,9 @@ ; SSE-NEXT: cmpltpd %xmm1, %xmm3 ; SSE-NEXT: cmpltpd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; @@ -90,8 +97,9 @@ ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -101,8 +109,9 @@ ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vmovmskps %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskps %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -121,24 +130,27 @@ ; SSE-LABEL: test_v4f32_sext: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm0, %xmm1 -; SSE-NEXT: movmskps %xmm1, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm1, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vmovmskps %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskps %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: retq %c = fcmp ogt <4 x float> %a0, %a1 @@ -157,16 +169,18 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: orps %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskps %ymm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %ymm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -174,8 +188,9 @@ ; AVX512-LABEL: test_v8f32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vmovmskps %ymm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskps %ymm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -197,8 +212,9 @@ ; SSE-NEXT: cmpltps %xmm1, %xmm3 ; SSE-NEXT: cmpltps %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -207,8 +223,9 @@ ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -217,8 +234,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 -; AVX512-NEXT: vpmovmskb %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -239,24 +257,27 @@ ; SSE-LABEL: test_v2i64_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskpd %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskpd %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2i64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovmskpd %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskpd %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: retq %c = icmp sgt <2 x i64> %a0, %a1 @@ -273,8 +294,9 @@ ; SSE-NEXT: pcmpgtq %xmm3, %xmm1 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskpd %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; @@ -285,8 +307,9 @@ ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskpd %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vmovmskpd %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -294,8 +317,9 @@ ; AVX2-LABEL: test_v4i64_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vmovmskpd %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -303,8 +327,9 @@ ; AVX512-LABEL: test_v4i64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vmovmskpd %ymm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskpd %ymm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -324,8 +349,9 @@ ; SSE-NEXT: pcmpgtq %xmm3, %xmm1 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbq %rax, %rax ; SSE-NEXT: retq ; @@ -336,8 +362,9 @@ ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vmovmskps %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -347,8 +374,9 @@ ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vmovmskps %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -358,8 +386,9 @@ ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vmovmskps %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskps %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -378,24 +407,27 @@ ; SSE-LABEL: test_v4i32_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4i32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovmskps %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskps %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: retq %c = icmp sgt <4 x i32> %a0, %a1 @@ -414,8 +446,9 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: movmskps %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -426,8 +459,9 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vmovmskps %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -435,8 +469,9 @@ ; AVX2-LABEL: test_v8i32_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vmovmskps %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -444,8 +479,9 @@ ; AVX512-LABEL: test_v8i32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vmovmskps %ymm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vmovmskps %ymm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -467,8 +503,9 @@ ; SSE-NEXT: pcmpgtd %xmm3, %xmm1 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: retq ; @@ -479,8 +516,9 @@ ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -490,8 +528,9 @@ ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %xmm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -500,8 +539,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 -; AVX512-NEXT: vpmovmskb %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -522,8 +562,9 @@ ; SSE-LABEL: test_v8i16_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -531,8 +572,9 @@ ; AVX-LABEL: test_v8i16_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -540,8 +582,9 @@ ; AVX512-LABEL: test_v8i16_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpmovmskb %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq @@ -563,8 +606,9 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -576,8 +620,9 @@ ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -586,8 +631,9 @@ ; AVX2-LABEL: test_v16i16_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -596,8 +642,9 @@ ; AVX512-LABEL: test_v16i16_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovmskb %ymm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %ymm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper @@ -622,8 +669,9 @@ ; SSE-NEXT: pcmpgtw %xmm3, %xmm1 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $ax killed $ax killed $eax ; SSE-NEXT: retq @@ -635,8 +683,9 @@ ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -645,8 +694,9 @@ ; AVX2-LABEL: test_v16i16_legal_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -656,8 +706,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 -; AVX512-NEXT: vpmovmskb %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper @@ -681,8 +732,9 @@ ; SSE-LABEL: test_v16i8_sext: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -690,8 +742,9 @@ ; AVX-LABEL: test_v16i8_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: negl %eax +; AVX-NEXT: vpmovmskb %xmm0, %ecx +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: negl %ecx ; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -699,8 +752,9 @@ ; AVX512-LABEL: test_v16i8_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpmovmskb %xmm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %xmm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -724,8 +778,9 @@ ; SSE-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: negl %eax +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: negl %ecx ; SSE-NEXT: sbbl %eax, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -737,8 +792,9 @@ ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: negl %ecx ; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -747,8 +803,9 @@ ; AVX2-LABEL: test_v32i8_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: negl %ecx ; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -757,8 +814,9 @@ ; AVX512-LABEL: test_v32i8_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovmskb %ymm0, %eax -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpmovmskb %ymm0, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: negl %ecx ; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper