Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27769,8 +27769,11 @@ // ... // TrueVal = ... // cmpTY ccX, r1, r2 - // bCC copy1MBB + // bCC TrueMBB // fallthrough --> FalseMBB + // jmp SinkMBB + // TrueMBB: + // fallthrough --> SinkMBB // This code lowers all pseudo-CMOV instructions. Generally it lowers these // as described above, by inserting a BB, and then making a PHI at the join @@ -27840,10 +27843,12 @@ const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); MachineFunction *F = ThisMBB->getParent(); MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++ThisMBB->getIterator(); F->insert(It, FalseMBB); + F->insert(It, TrueMBB); F->insert(It, SinkMBB); // If the EFLAGS register isn't dead in the terminator, then claim that it's @@ -27852,6 +27857,7 @@ if (!LastCMOV->killsRegister(X86::EFLAGS) && !checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) { FalseMBB->addLiveIn(X86::EFLAGS); + TrueMBB->addLiveIn(X86::EFLAGS); SinkMBB->addLiveIn(X86::EFLAGS); } @@ -27863,22 +27869,27 @@ // Fallthrough block for ThisMBB. ThisMBB->addSuccessor(FalseMBB); - // The true block target of the first (or only) branch is always a SinkMBB. - ThisMBB->addSuccessor(SinkMBB); - // Fallthrough block for FalseMBB. + // The true block target of the first (or only) branch is always a TrueMBB. + ThisMBB->addSuccessor(TrueMBB); + // Jump target block for FalseMBB. FalseMBB->addSuccessor(SinkMBB); + // Fallthrough target block for TrueMBB. + TrueMBB->addSuccessor(SinkMBB); // Create the conditional branch instruction. unsigned Opc = X86::GetCondBranchFromCond(CC); - BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB); + BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(TrueMBB); + + // Create the unconditional branch for FalseMBB. + BuildMI(FalseMBB, DL, TII->get(X86::JMP_1)).addMBB(SinkMBB); // SinkMBB: - // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ] + // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, TrueMBB ] // ... MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); MachineBasicBlock::iterator MIItEnd = std::next(MachineBasicBlock::iterator(LastCMOV)); - createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB); + createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, TrueMBB, FalseMBB, SinkMBB); // Now remove the CMOV(s). ThisMBB->erase(MIItBegin, MIItEnd); Index: test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll =================================================================== --- test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll +++ test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll @@ -10,12 +10,13 @@ define <4 x i32> @test(<4 x i32>* %p) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps (%rdi), %xmm0 -; CHECK-NEXT: extractps $2, %xmm0, %eax +; CHECK-NEXT: movaps (%rdi), %xmm1 +; CHECK-NEXT: extractps $2, %xmm1, %eax ; CHECK-NEXT: cmpl $3, %eax -; CHECK-NEXT: je .LBB0_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq %v = load <4 x i32>, <4 x i32>* %p Index: test/CodeGen/X86/MachineSink-eflags.ll =================================================================== --- test/CodeGen/X86/MachineSink-eflags.ll +++ test/CodeGen/X86/MachineSink-eflags.ll @@ -26,20 +26,19 @@ ; CHECK-NEXT: shlq $4, %rdx ; CHECK-NEXT: movaps (%rax,%rdx), %xmm0 ; CHECK-NEXT: cmpl $0, (%rdi) -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: movaps (%rax,%rcx), %xmm1 -; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movaps (%rax,%rcx), %xmm2 +; CHECK-NEXT: .LBB0_2: # %entry ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rsp +; CHECK-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: .LBB0_4: # %entry ; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: jne .LBB0_5 -; CHECK-NEXT: # %bb.4: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: .LBB0_5: # %entry -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: addq $152, %rsp ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/atomic32.ll =================================================================== --- test/CodeGen/X86/atomic32.ll +++ test/CodeGen/X86/atomic32.ll @@ -322,7 +322,6 @@ ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: ; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx @@ -334,18 +333,21 @@ ; X86-NOCMOV-NEXT: movl %eax, %ecx ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jge .LBB6_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: jmp .LBB6_5 ; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: .LBB6_5: # %atomicrmw.start +; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax @@ -358,7 +360,6 @@ ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire @@ -418,7 +419,6 @@ ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: ; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx @@ -430,18 +430,21 @@ ; X86-NOCMOV-NEXT: movl %eax, %ecx ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jle .LBB7_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: jmp .LBB7_5 ; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: .LBB7_5: # %atomicrmw.start +; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax @@ -454,7 +457,6 @@ ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire @@ -514,7 +516,6 @@ ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: ; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx @@ -526,18 +527,21 @@ ; X86-NOCMOV-NEXT: movl %eax, %ecx ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: ja .LBB8_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: jmp .LBB8_5 ; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: .LBB8_5: # %atomicrmw.start +; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax @@ -550,7 +554,6 @@ ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire @@ -610,7 +613,6 @@ ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: ; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx @@ -622,18 +624,21 @@ ; X86-NOCMOV-NEXT: movl %eax, %ecx ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jbe .LBB9_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: jmp .LBB9_5 ; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: .LBB9_5: # %atomicrmw.start +; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax @@ -646,7 +651,6 @@ ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire Index: test/CodeGen/X86/avx-select.ll =================================================================== --- test/CodeGen/X86/avx-select.ll +++ test/CodeGen/X86/avx-select.ll @@ -6,10 +6,10 @@ ; X86-LABEL: select00: ; X86: # %bb.0: ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) -; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-NEXT: je .LBB0_2 -; X86-NEXT: # %bb.1: ; X86-NEXT: vmovaps %ymm0, %ymm1 +; X86-NEXT: jne .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-NEXT: .LBB0_2: ; X86-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; X86-NEXT: retl @@ -17,10 +17,10 @@ ; X64-LABEL: select00: ; X64: # %bb.0: ; X64-NEXT: cmpl $255, %edi -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: je .LBB0_2 -; X64-NEXT: # %bb.1: ; X64-NEXT: vmovaps %ymm0, %ymm1 +; X64-NEXT: jne .LBB0_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: .LBB0_2: ; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq @@ -34,10 +34,10 @@ ; X86-LABEL: select01: ; X86: # %bb.0: ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) -; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-NEXT: je .LBB1_2 -; X86-NEXT: # %bb.1: ; X86-NEXT: vmovaps %ymm0, %ymm1 +; X86-NEXT: jne .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-NEXT: .LBB1_2: ; X86-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; X86-NEXT: retl @@ -45,10 +45,10 @@ ; X64-LABEL: select01: ; X64: # %bb.0: ; X64-NEXT: cmpl $255, %edi -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: je .LBB1_2 -; X64-NEXT: # %bb.1: ; X64-NEXT: vmovaps %ymm0, %ymm1 +; X64-NEXT: jne .LBB1_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: .LBB1_2: ; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -234,19 +234,21 @@ ; ALL_X64-LABEL: test8: ; ALL_X64: ## %bb.0: ; ALL_X64-NEXT: testb $1, %dil -; ALL_X64-NEXT: jne LBB8_2 +; ALL_X64-NEXT: je LBB8_2 ; ALL_X64-NEXT: ## %bb.1: -; ALL_X64-NEXT: vmovaps %xmm1, %xmm0 +; ALL_X64-NEXT: vmovaps %xmm0, %xmm1 ; ALL_X64-NEXT: LBB8_2: +; ALL_X64-NEXT: vmovaps %xmm1, %xmm0 ; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test8: ; KNL_X32: ## %bb.0: ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) -; KNL_X32-NEXT: jne LBB8_2 +; KNL_X32-NEXT: je LBB8_2 ; KNL_X32-NEXT: ## %bb.1: -; KNL_X32-NEXT: vmovaps %xmm1, %xmm0 +; KNL_X32-NEXT: vmovaps %xmm0, %xmm1 ; KNL_X32-NEXT: LBB8_2: +; KNL_X32-NEXT: vmovaps %xmm1, %xmm0 ; KNL_X32-NEXT: retl %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2 ret <16 x i8> %res Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -671,14 +671,12 @@ ; KNL-LABEL: test8: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB17_1 -; KNL-NEXT: ## %bb.2: ; KNL-NEXT: kxorw %k0, %k0, %k1 -; KNL-NEXT: jmp LBB17_3 -; KNL-NEXT: LBB17_1: +; KNL-NEXT: jle LBB17_2 +; KNL-NEXT: ## %bb.1: ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; KNL-NEXT: LBB17_3: +; KNL-NEXT: LBB17_2: ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vzeroupper @@ -687,15 +685,12 @@ ; SKX-LABEL: test8: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: jg LBB17_1 -; SKX-NEXT: ## %bb.2: ; SKX-NEXT: kxorw %k0, %k0, %k0 -; SKX-NEXT: vpmovm2b %k0, %xmm0 -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; SKX-NEXT: LBB17_1: +; SKX-NEXT: jle LBB17_2 +; SKX-NEXT: ## %bb.1: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; SKX-NEXT: LBB17_2: ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -703,16 +698,12 @@ ; AVX512BW-LABEL: test8: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: jg LBB17_1 -; AVX512BW-NEXT: ## %bb.2: ; AVX512BW-NEXT: kxorw %k0, %k0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; AVX512BW-NEXT: LBB17_1: +; AVX512BW-NEXT: jle LBB17_2 +; AVX512BW-NEXT: ## %bb.1: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: LBB17_2: ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -721,14 +712,12 @@ ; AVX512DQ-LABEL: test8: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: jg LBB17_1 -; AVX512DQ-NEXT: ## %bb.2: ; AVX512DQ-NEXT: kxorw %k0, %k0, %k0 -; AVX512DQ-NEXT: jmp LBB17_3 -; AVX512DQ-NEXT: LBB17_1: +; AVX512DQ-NEXT: jle LBB17_2 +; AVX512DQ-NEXT: ## %bb.1: ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; AVX512DQ-NEXT: LBB17_3: +; AVX512DQ-NEXT: LBB17_2: ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper @@ -738,15 +727,12 @@ ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jg LBB17_1 -; X86-NEXT: ## %bb.2: ; X86-NEXT: kxorw %k0, %k0, %k0 -; X86-NEXT: vpmovm2b %k0, %xmm0 -; X86-NEXT: vzeroupper -; X86-NEXT: retl -; X86-NEXT: LBB17_1: +; X86-NEXT: jle LBB17_2 +; X86-NEXT: ## %bb.1: ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; X86-NEXT: LBB17_2: ; X86-NEXT: vpmovm2b %k0, %xmm0 ; X86-NEXT: vzeroupper ; X86-NEXT: retl @@ -761,11 +747,11 @@ ; KNL-LABEL: test9: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB18_1 -; KNL-NEXT: ## %bb.2: +; KNL-NEXT: jg LBB18_2 +; KNL-NEXT: ## %bb.1: ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 ; KNL-NEXT: jmp LBB18_3 -; KNL-NEXT: LBB18_1: +; KNL-NEXT: LBB18_2: ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: LBB18_3: ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 @@ -778,11 +764,11 @@ ; SKX-LABEL: test9: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: jg LBB18_1 -; SKX-NEXT: ## %bb.2: +; SKX-NEXT: jg LBB18_2 +; SKX-NEXT: ## %bb.1: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 ; SKX-NEXT: jmp LBB18_3 -; SKX-NEXT: LBB18_1: +; SKX-NEXT: LBB18_2: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ; SKX-NEXT: LBB18_3: ; SKX-NEXT: vpmovb2m %xmm0, %k0 @@ -792,11 +778,11 @@ ; AVX512BW-LABEL: test9: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: jg LBB18_1 -; AVX512BW-NEXT: ## %bb.2: +; AVX512BW-NEXT: jg LBB18_2 +; AVX512BW-NEXT: ## %bb.1: ; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0 ; AVX512BW-NEXT: jmp LBB18_3 -; AVX512BW-NEXT: LBB18_1: +; AVX512BW-NEXT: LBB18_2: ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512BW-NEXT: LBB18_3: ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 @@ -808,11 +794,11 @@ ; AVX512DQ-LABEL: test9: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: jg LBB18_1 -; AVX512DQ-NEXT: ## %bb.2: +; AVX512DQ-NEXT: jg LBB18_2 +; AVX512DQ-NEXT: ## %bb.1: ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 ; AVX512DQ-NEXT: jmp LBB18_3 -; AVX512DQ-NEXT: LBB18_1: +; AVX512DQ-NEXT: LBB18_2: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: LBB18_3: ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 @@ -826,11 +812,11 @@ ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jg LBB18_1 -; X86-NEXT: ## %bb.2: +; X86-NEXT: jg LBB18_2 +; X86-NEXT: ## %bb.1: ; X86-NEXT: vpsllw $7, %xmm1, %xmm0 ; X86-NEXT: jmp LBB18_3 -; X86-NEXT: LBB18_1: +; X86-NEXT: LBB18_2: ; X86-NEXT: vpsllw $7, %xmm0, %xmm0 ; X86-NEXT: LBB18_3: ; X86-NEXT: vpmovb2m %xmm0, %k0 @@ -849,11 +835,11 @@ ; KNL-LABEL: test11: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB20_1 -; KNL-NEXT: ## %bb.2: +; KNL-NEXT: jg LBB20_2 +; KNL-NEXT: ## %bb.1: ; KNL-NEXT: vpslld $31, %xmm1, %xmm0 ; KNL-NEXT: jmp LBB20_3 -; KNL-NEXT: LBB20_1: +; KNL-NEXT: LBB20_2: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: LBB20_3: ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 @@ -865,11 +851,11 @@ ; SKX-LABEL: test11: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: jg LBB20_1 -; SKX-NEXT: ## %bb.2: +; SKX-NEXT: jg LBB20_2 +; SKX-NEXT: ## %bb.1: ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 ; SKX-NEXT: jmp LBB20_3 -; SKX-NEXT: LBB20_1: +; SKX-NEXT: LBB20_2: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: LBB20_3: ; SKX-NEXT: vpmovd2m %xmm0, %k0 @@ -879,11 +865,11 @@ ; AVX512BW-LABEL: test11: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: jg LBB20_1 -; AVX512BW-NEXT: ## %bb.2: +; AVX512BW-NEXT: jg LBB20_2 +; AVX512BW-NEXT: ## %bb.1: ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 ; AVX512BW-NEXT: jmp LBB20_3 -; AVX512BW-NEXT: LBB20_1: +; AVX512BW-NEXT: LBB20_2: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: LBB20_3: ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 @@ -895,11 +881,11 @@ ; AVX512DQ-LABEL: test11: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: jg LBB20_1 -; AVX512DQ-NEXT: ## %bb.2: +; AVX512DQ-NEXT: jg LBB20_2 +; AVX512DQ-NEXT: ## %bb.1: ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0 ; AVX512DQ-NEXT: jmp LBB20_3 -; AVX512DQ-NEXT: LBB20_1: +; AVX512DQ-NEXT: LBB20_2: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: LBB20_3: ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 @@ -912,11 +898,11 @@ ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jg LBB20_1 -; X86-NEXT: ## %bb.2: +; X86-NEXT: jg LBB20_2 +; X86-NEXT: ## %bb.1: ; X86-NEXT: vpslld $31, %xmm1, %xmm0 ; X86-NEXT: jmp LBB20_3 -; X86-NEXT: LBB20_1: +; X86-NEXT: LBB20_2: ; X86-NEXT: vpslld $31, %xmm0, %xmm0 ; X86-NEXT: LBB20_3: ; X86-NEXT: vpmovd2m %xmm0, %k0 Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -7056,32 +7056,26 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vcmp_test8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] -; GENERIC-NEXT: # %bb.2: ; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; GENERIC-NEXT: .LBB386_1: +; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] +; GENERIC-NEXT: jle .LBB386_2 # sched: [1:1.00] +; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] ; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] +; GENERIC-NEXT: .LBB386_2: ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test8: ; SKX: # %bb.0: -; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] -; SKX-NEXT: # %bb.2: ; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; SKX-NEXT: .LBB386_1: +; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] +; SKX-NEXT: jle .LBB386_2 # sched: [1:0.50] +; SKX-NEXT: # %bb.1: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: .LBB386_2: ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -7096,11 +7090,11 @@ ; GENERIC-LABEL: vpmov_test9: ; GENERIC: # %bb.0: ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00] -; GENERIC-NEXT: # %bb.2: +; GENERIC-NEXT: jg .LBB387_2 # sched: [1:1.00] +; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00] -; GENERIC-NEXT: .LBB387_1: +; GENERIC-NEXT: .LBB387_2: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: .LBB387_3: ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] @@ -7110,11 +7104,11 @@ ; SKX-LABEL: vpmov_test9: ; SKX: # %bb.0: ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50] -; SKX-NEXT: # %bb.2: +; SKX-NEXT: jg .LBB387_2 # sched: [1:0.50] +; SKX-NEXT: # %bb.1: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50] ; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50] -; SKX-NEXT: .LBB387_1: +; SKX-NEXT: .LBB387_2: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: .LBB387_3: ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] @@ -7133,11 +7127,11 @@ ; GENERIC-LABEL: vmov_test11: ; GENERIC: # %bb.0: ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00] -; GENERIC-NEXT: # %bb.2: +; GENERIC-NEXT: jg .LBB389_2 # sched: [1:1.00] +; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00] -; GENERIC-NEXT: .LBB389_1: +; GENERIC-NEXT: .LBB389_2: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: .LBB389_3: ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] @@ -7147,11 +7141,11 @@ ; SKX-LABEL: vmov_test11: ; SKX: # %bb.0: ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50] -; SKX-NEXT: # %bb.2: +; SKX-NEXT: jg .LBB389_2 # sched: [1:0.50] +; SKX-NEXT: # %bb.1: ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] ; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50] -; SKX-NEXT: .LBB389_1: +; SKX-NEXT: .LBB389_2: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: .LBB389_3: ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -6,10 +6,10 @@ ; X86-LABEL: select00: ; X86: # %bb.0: ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: je .LBB0_2 -; X86-NEXT: # %bb.1: ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 +; X86-NEXT: jne .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: .LBB0_2: ; X86-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; X86-NEXT: retl @@ -17,10 +17,10 @@ ; X64-LABEL: select00: ; X64: # %bb.0: ; X64-NEXT: cmpl $255, %edi -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: je .LBB0_2 -; X64-NEXT: # %bb.1: ; X64-NEXT: vmovdqa64 %zmm0, %zmm1 +; X64-NEXT: jne .LBB0_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: .LBB0_2: ; X64-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq @@ -34,10 +34,10 @@ ; X86-LABEL: select01: ; X86: # %bb.0: ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: je .LBB1_2 -; X86-NEXT: # %bb.1: ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 +; X86-NEXT: jne .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: .LBB1_2: ; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; X86-NEXT: retl @@ -45,10 +45,10 @@ ; X64-LABEL: select01: ; X64: # %bb.0: ; X64-NEXT: cmpl $255, %edi -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: je .LBB1_2 -; X64-NEXT: # %bb.1: ; X64-NEXT: vmovdqa64 %zmm0, %zmm1 +; X64-NEXT: jne .LBB1_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: .LBB1_2: ; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq Index: test/CodeGen/X86/btc_bts_btr.ll =================================================================== --- test/CodeGen/X86/btc_bts_btr.ll +++ test/CodeGen/X86/btc_bts_btr.ll @@ -123,7 +123,7 @@ ret i32 %2 } -define i64 @btr_64(i64 %x, i64 %n) { +define i64 @btr_64(i64 %x, i64 %n) nounwind { ; X64-LABEL: btr_64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax @@ -132,21 +132,26 @@ ; ; X86-LABEL: btr_64: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB6_2 +; X86-NEXT: jne .LBB6_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %eax +; X86-NEXT: jmp .LBB6_3 ; X86-NEXT: .LBB6_2: +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB6_3: ; X86-NEXT: notl %edx ; X86-NEXT: notl %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl %1 = shl i64 1, %n %2 = xor i64 %1, -1 @@ -154,7 +159,7 @@ ret i64 %3 } -define i64 @bts_64(i64 %x, i64 %n) { +define i64 @bts_64(i64 %x, i64 %n) nounwind { ; X64-LABEL: bts_64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax @@ -163,26 +168,31 @@ ; ; X86-LABEL: bts_64: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB7_2 +; X86-NEXT: jne .LBB7_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %eax +; X86-NEXT: jmp .LBB7_3 ; X86-NEXT: .LBB7_2: +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB7_3: ; X86-NEXT: orl {{[0-9]+}}(%esp), %edx ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl %1 = shl i64 1, %n %2 = or i64 %x, %1 ret i64 %2 } -define i64 @btc_64(i64 %x, i64 %n) { +define i64 @btc_64(i64 %x, i64 %n) nounwind { ; X64-LABEL: btc_64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax @@ -191,19 +201,24 @@ ; ; X86-LABEL: btc_64: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB8_2 +; X86-NEXT: jne .LBB8_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %eax +; X86-NEXT: jmp .LBB8_3 ; X86-NEXT: .LBB8_2: +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB8_3: ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl %1 = shl i64 1, %n %2 = xor i64 %x, %1 @@ -340,7 +355,7 @@ ret i32 %3 } -define i64 @btr_64_mask(i64 %x, i64 %n) { +define i64 @btr_64_mask(i64 %x, i64 %n) nounwind { ; X64-LABEL: btr_64_mask: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax @@ -349,21 +364,26 @@ ; ; X86-LABEL: btr_64_mask: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB15_2 +; X86-NEXT: jne .LBB15_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %eax +; X86-NEXT: jmp .LBB15_3 ; X86-NEXT: .LBB15_2: +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB15_3: ; X86-NEXT: notl %edx ; X86-NEXT: notl %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl %1 = and i64 %n, 63 %2 = shl i64 1, %1 @@ -372,7 +392,7 @@ ret i64 %4 } -define i64 @bts_64_mask(i64 %x, i64 %n) { +define i64 @bts_64_mask(i64 %x, i64 %n) nounwind { ; X64-LABEL: bts_64_mask: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax @@ -381,19 +401,24 @@ ; ; X86-LABEL: bts_64_mask: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB16_2 +; X86-NEXT: jne .LBB16_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %eax +; X86-NEXT: jmp .LBB16_3 ; X86-NEXT: .LBB16_2: +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB16_3: ; X86-NEXT: orl {{[0-9]+}}(%esp), %edx ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl %1 = and i64 %n, 63 %2 = shl i64 1, %1 @@ -401,7 +426,7 @@ ret i64 %3 } -define i64 @btc_64_mask(i64 %x, i64 %n) { +define i64 @btc_64_mask(i64 %x, i64 %n) nounwind { ; X64-LABEL: btc_64_mask: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax @@ -410,19 +435,24 @@ ; ; X86-LABEL: btc_64_mask: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB17_2 +; X86-NEXT: jne .LBB17_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %eax +; X86-NEXT: jmp .LBB17_3 ; X86-NEXT: .LBB17_2: +; X86-NEXT: movl %esi, %edx +; X86-NEXT: .LBB17_3: ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl %1 = and i64 %n, 63 %2 = shl i64 1, %1 @@ -566,7 +596,7 @@ ret i32 %3 } -define i64 @btr_64_load(i64* %x, i64 %n) { +define i64 @btr_64_load(i64* %x, i64 %n) nounwind { ; X64-LABEL: btr_64_load: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax @@ -575,27 +605,29 @@ ; ; X86-LABEL: btr_64_load: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: shll %cl, %edi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB24_2 +; X86-NEXT: jne .LBB24_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %edi, %eax +; X86-NEXT: jmp .LBB24_3 ; X86-NEXT: .LBB24_2: +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB24_3: ; X86-NEXT: notl %edx ; X86-NEXT: notl %eax ; X86-NEXT: andl 4(%esi), %edx ; X86-NEXT: andl (%esi), %eax ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: popl %edi ; X86-NEXT: retl %1 = load i64, i64* %x %2 = shl i64 1, %n @@ -604,7 +636,7 @@ ret i64 %4 } -define i64 @bts_64_load(i64* %x, i64 %n) { +define i64 @bts_64_load(i64* %x, i64 %n) nounwind { ; X64-LABEL: bts_64_load: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax @@ -613,25 +645,27 @@ ; ; X86-LABEL: bts_64_load: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: shll %cl, %edi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB25_2 +; X86-NEXT: jne .LBB25_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %edi, %eax +; X86-NEXT: jmp .LBB25_3 ; X86-NEXT: .LBB25_2: +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB25_3: ; X86-NEXT: orl 4(%esi), %edx ; X86-NEXT: orl (%esi), %eax ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: popl %edi ; X86-NEXT: retl %1 = load i64, i64* %x %2 = shl i64 1, %n @@ -648,24 +682,32 @@ ; ; X86-LABEL: btc_64_load: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %esi, -12 +; X86-NEXT: .cfi_offset %edi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: shll %cl, %edi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB26_2 +; X86-NEXT: jne .LBB26_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %edi, %eax +; X86-NEXT: jmp .LBB26_3 ; X86-NEXT: .LBB26_2: +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB26_3: ; X86-NEXT: xorl 4(%esi), %edx ; X86-NEXT: xorl (%esi), %eax ; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %edi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl %1 = load i64, i64* %x @@ -830,7 +872,7 @@ ret void } -define void @btr_64_dont_fold(i64* %x, i64 %n) { +define void @btr_64_dont_fold(i64* %x, i64 %n) nounwind { ; X64-LABEL: btr_64_dont_fold: ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rcx @@ -842,27 +884,29 @@ ; ; X86-LABEL: btr_64_dont_fold: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %edx +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %esi, %esi -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: shll %cl, %edx +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shll %cl, %edi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB33_2 +; X86-NEXT: jne .LBB33_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %esi -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %edi, %edx +; X86-NEXT: jmp .LBB33_3 ; X86-NEXT: .LBB33_2: +; X86-NEXT: movl %edi, %esi +; X86-NEXT: .LBB33_3: ; X86-NEXT: notl %esi ; X86-NEXT: notl %edx ; X86-NEXT: andl %esi, 4(%eax) ; X86-NEXT: andl %edx, (%eax) ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: popl %edi ; X86-NEXT: retl %1 = load i64, i64* %x %2 = shl i64 1, %n @@ -872,7 +916,7 @@ ret void } -define void @bts_64_dont_fold(i64* %x, i64 %n) { +define void @bts_64_dont_fold(i64* %x, i64 %n) nounwind { ; X64-LABEL: bts_64_dont_fold: ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rcx @@ -884,25 +928,27 @@ ; ; X86-LABEL: bts_64_dont_fold: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %edx +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %esi, %esi -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: shll %cl, %edx +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shll %cl, %edi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB34_2 +; X86-NEXT: jne .LBB34_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %esi -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %edi, %edx +; X86-NEXT: jmp .LBB34_3 ; X86-NEXT: .LBB34_2: +; X86-NEXT: movl %edi, %esi +; X86-NEXT: .LBB34_3: ; X86-NEXT: orl %esi, 4(%eax) ; X86-NEXT: orl %edx, (%eax) ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: popl %edi ; X86-NEXT: retl %1 = load i64, i64* %x %2 = shl i64 1, %n @@ -923,24 +969,32 @@ ; ; X86-LABEL: btc_64_dont_fold: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %esi, -12 +; X86-NEXT: .cfi_offset %edi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl $1, %edx +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %esi, %esi -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: shll %cl, %edx +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shll %cl, %edi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB35_2 +; X86-NEXT: jne .LBB35_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %esi -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %edi, %edx +; X86-NEXT: jmp .LBB35_3 ; X86-NEXT: .LBB35_2: +; X86-NEXT: movl %edi, %esi +; X86-NEXT: .LBB35_3: ; X86-NEXT: xorl %esi, 4(%eax) ; X86-NEXT: xorl %edx, (%eax) ; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %edi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl %1 = load i64, i64* %x Index: test/CodeGen/X86/clear-highbits.ll =================================================================== --- test/CodeGen/X86/clear-highbits.ll +++ test/CodeGen/X86/clear-highbits.ll @@ -879,25 +879,25 @@ ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: pushl %eax ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %esi ; X86-NOBMI2-NEXT: movl $-1, %edi -; X86-NOBMI2-NEXT: shrl %cl, %edi -; X86-NOBMI2-NEXT: shrdl %cl, %esi, %esi +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: shrl %cl, %esi +; X86-NOBMI2-NEXT: shrdl %cl, %edi, %edi ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB19_2 ; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edi, %esi -; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi ; X86-NOBMI2-NEXT: .LBB19_2: ; X86-NOBMI2-NEXT: subl $8, %esp -; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: calll use64 ; X86-NOBMI2-NEXT: addl $16, %esp -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NOBMI2-NEXT: movl %esi, %eax -; X86-NOBMI2-NEXT: movl %edi, %edx +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edi, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx ; X86-NOBMI2-NEXT: addl $4, %esp ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: popl %edi Index: test/CodeGen/X86/clz.ll =================================================================== --- test/CodeGen/X86/clz.ll +++ test/CodeGen/X86/clz.ll @@ -99,13 +99,13 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: testl %eax, %eax -; X32-NEXT: jne .LBB3_1 -; X32-NEXT: # %bb.2: +; X32-NEXT: jne .LBB3_2 +; X32-NEXT: # %bb.1: ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax ; X32-NEXT: addl $32, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: .LBB3_1: +; X32-NEXT: .LBB3_2: ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl @@ -119,13 +119,13 @@ ; X32-CLZ: # %bb.0: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: testl %eax, %eax -; X32-CLZ-NEXT: jne .LBB3_1 -; X32-CLZ-NEXT: # %bb.2: +; X32-CLZ-NEXT: jne .LBB3_2 +; X32-CLZ-NEXT: # %bb.1: ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl -; X32-CLZ-NEXT: .LBB3_1: +; X32-CLZ-NEXT: .LBB3_2: ; X32-CLZ-NEXT: tzcntl %eax, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl @@ -233,14 +233,14 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: testl %eax, %eax -; X32-NEXT: jne .LBB7_1 -; X32-NEXT: # %bb.2: +; X32-NEXT: jne .LBB7_2 +; X32-NEXT: # %bb.1: ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: addl $32, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: .LBB7_1: +; X32-NEXT: .LBB7_2: ; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: xorl %edx, %edx @@ -256,13 +256,13 @@ ; X32-CLZ: # %bb.0: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: testl %eax, %eax -; X32-CLZ-NEXT: jne .LBB7_1 -; X32-CLZ-NEXT: # %bb.2: +; X32-CLZ-NEXT: jne .LBB7_2 +; X32-CLZ-NEXT: # %bb.1: ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl -; X32-CLZ-NEXT: .LBB7_1: +; X32-CLZ-NEXT: .LBB7_2: ; X32-CLZ-NEXT: lzcntl %eax, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl @@ -289,7 +289,7 @@ ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl ; X32-NEXT: .LBB8_1: -; X32-NEXT: movb $8, %al +; X32-NEXT: movb $8, %al ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl ; @@ -304,7 +304,7 @@ ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; X64-NEXT: .LBB8_1: -; X64-NEXT: movb $8, %al +; X64-NEXT: movb $8, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; @@ -340,7 +340,7 @@ ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: retl ; X32-NEXT: .LBB9_1: -; X32-NEXT: movw $16, %ax +; X32-NEXT: movw $16, %ax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: retl ; @@ -383,7 +383,7 @@ ; X32-NEXT: xorl $31, %eax ; X32-NEXT: retl ; X32-NEXT: .LBB10_1: -; X32-NEXT: movl $32, %eax +; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: ctlz_i32_zero_test: @@ -416,20 +416,19 @@ ; X32-LABEL: ctlz_i64_zero_test: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: bsrl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl $63, %eax -; X32-NEXT: je .LBB11_2 +; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax +; X32-NEXT: jne .LBB11_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl $63, %eax ; X32-NEXT: .LBB11_2: ; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: jne .LBB11_3 -; X32-NEXT: # %bb.4: +; X32-NEXT: jne .LBB11_4 +; X32-NEXT: # %bb.3: ; X32-NEXT: xorl $31, %eax ; X32-NEXT: addl $32, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: .LBB11_3: +; X32-NEXT: .LBB11_4: ; X32-NEXT: bsrl %ecx, %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: xorl %edx, %edx @@ -451,13 +450,13 @@ ; X32-CLZ: # %bb.0: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: testl %eax, %eax -; X32-CLZ-NEXT: jne .LBB11_1 -; X32-CLZ-NEXT: # %bb.2: +; X32-CLZ-NEXT: jne .LBB11_2 +; X32-CLZ-NEXT: # %bb.1: ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl -; X32-CLZ-NEXT: .LBB11_1: +; X32-CLZ-NEXT: .LBB11_2: ; X32-CLZ-NEXT: lzcntl %eax, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl @@ -482,7 +481,7 @@ ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl -; X32-NEXT: .LBB12_1 +; X32-NEXT: .LBB12_1: ; X32-NEXT: movb $8, %al ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl @@ -530,7 +529,7 @@ ; X32-NEXT: # %bb.2: # %cond.false ; X32-NEXT: bsfw %ax, %ax ; X32-NEXT: retl -; X32-NEXT: .LBB13_1 +; X32-NEXT: .LBB13_1: ; X32-NEXT: movw $16, %ax ; X32-NEXT: retl ; @@ -568,7 +567,7 @@ ; X32-NEXT: # %bb.2: # %cond.false ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: retl -; X32-NEXT: .LBB14_1 +; X32-NEXT: .LBB14_1: ; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; @@ -601,19 +600,18 @@ ; X32-LABEL: cttz_i64_zero_test: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: bsfl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl $32, %eax -; X32-NEXT: je .LBB15_2 +; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax +; X32-NEXT: jne .LBB15_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl $32, %eax ; X32-NEXT: .LBB15_2: ; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: jne .LBB15_3 -; X32-NEXT: # %bb.4: +; X32-NEXT: jne .LBB15_4 +; X32-NEXT: # %bb.3: ; X32-NEXT: addl $32, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: .LBB15_3: +; X32-NEXT: .LBB15_4: ; X32-NEXT: bsfl %ecx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl @@ -633,13 +631,13 @@ ; X32-CLZ: # %bb.0: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: testl %eax, %eax -; X32-CLZ-NEXT: jne .LBB15_1 -; X32-CLZ-NEXT: # %bb.2: +; X32-CLZ-NEXT: jne .LBB15_2 +; X32-CLZ-NEXT: # %bb.1: ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl -; X32-CLZ-NEXT: .LBB15_1: +; X32-CLZ-NEXT: .LBB15_2: ; X32-CLZ-NEXT: tzcntl %eax, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl @@ -667,7 +665,7 @@ ; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: retl -; X32-NEXT: .LBB16_1 +; X32-NEXT: .LBB16_1: ; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; @@ -871,8 +869,8 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: testl %eax, %eax -; X32-NEXT: jne .LBB21_1 -; X32-NEXT: # %bb.2: +; X32-NEXT: jne .LBB21_2 +; X32-NEXT: # %bb.1: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: orl $1, %eax ; X32-NEXT: bsrl %eax, %eax @@ -880,7 +878,7 @@ ; X32-NEXT: orl $32, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: .LBB21_1: +; X32-NEXT: .LBB21_2: ; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: xorl %edx, %edx @@ -902,15 +900,15 @@ ; X32-CLZ: # %bb.0: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: testl %eax, %eax -; X32-CLZ-NEXT: jne .LBB21_1 -; X32-CLZ-NEXT: # %bb.2: +; X32-CLZ-NEXT: jne .LBB21_2 +; X32-CLZ-NEXT: # %bb.1: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: orl $1, %eax ; X32-CLZ-NEXT: lzcntl %eax, %eax ; X32-CLZ-NEXT: orl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl -; X32-CLZ-NEXT: .LBB21_1: +; X32-CLZ-NEXT: .LBB21_2: ; X32-CLZ-NEXT: lzcntl %eax, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl @@ -932,15 +930,15 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: testl %eax, %eax -; X32-NEXT: jne .LBB22_1 -; X32-NEXT: # %bb.2: +; X32-NEXT: jne .LBB22_2 +; X32-NEXT: # %bb.1: ; X32-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: orl $32, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: .LBB22_1: +; X32-NEXT: .LBB22_2: ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl @@ -961,15 +959,15 @@ ; X32-CLZ: # %bb.0: ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: testl %eax, %eax -; X32-CLZ-NEXT: jne .LBB22_1 -; X32-CLZ-NEXT: # %bb.2: +; X32-CLZ-NEXT: jne .LBB22_2 +; X32-CLZ-NEXT: # %bb.1: ; X32-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X32-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: tzcntl %eax, %eax ; X32-CLZ-NEXT: orl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl -; X32-CLZ-NEXT: .LBB22_1: +; X32-CLZ-NEXT: .LBB22_2: ; X32-CLZ-NEXT: tzcntl %eax, %eax ; X32-CLZ-NEXT: xorl %edx, %edx ; X32-CLZ-NEXT: retl Index: test/CodeGen/X86/cmov-promotion.ll =================================================================== --- test/CodeGen/X86/cmov-promotion.ll +++ test/CodeGen/X86/cmov-promotion.ll @@ -6,10 +6,10 @@ ; CMOV-LABEL: cmov_zpromotion_8_to_16: ; CMOV: # %bb.0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movb $117, %al -; CMOV-NEXT: jne .LBB0_2 -; CMOV-NEXT: # %bb.1: ; CMOV-NEXT: movb $-19, %al +; CMOV-NEXT: je .LBB0_2 +; CMOV-NEXT: # %bb.1: +; CMOV-NEXT: movb $117, %al ; CMOV-NEXT: .LBB0_2: ; CMOV-NEXT: movzbl %al, %eax ; CMOV-NEXT: # kill: def $ax killed $ax killed $eax @@ -18,10 +18,10 @@ ; NO_CMOV-LABEL: cmov_zpromotion_8_to_16: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $117, %al -; NO_CMOV-NEXT: jne .LBB0_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movb $-19, %al +; NO_CMOV-NEXT: je .LBB0_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movb $117, %al ; NO_CMOV-NEXT: .LBB0_2: ; NO_CMOV-NEXT: movzbl %al, %eax ; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax @@ -35,10 +35,10 @@ ; CMOV-LABEL: cmov_zpromotion_8_to_32: ; CMOV: # %bb.0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movb $126, %al -; CMOV-NEXT: jne .LBB1_2 -; CMOV-NEXT: # %bb.1: ; CMOV-NEXT: movb $-1, %al +; CMOV-NEXT: je .LBB1_2 +; CMOV-NEXT: # %bb.1: +; CMOV-NEXT: movb $126, %al ; CMOV-NEXT: .LBB1_2: ; CMOV-NEXT: movzbl %al, %eax ; CMOV-NEXT: retq @@ -46,10 +46,10 @@ ; NO_CMOV-LABEL: cmov_zpromotion_8_to_32: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al -; NO_CMOV-NEXT: jne .LBB1_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: je .LBB1_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movb $126, %al ; NO_CMOV-NEXT: .LBB1_2: ; NO_CMOV-NEXT: movzbl %al, %eax ; NO_CMOV-NEXT: retl @@ -62,10 +62,10 @@ ; CMOV-LABEL: cmov_zpromotion_8_to_64: ; CMOV: # %bb.0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movb $126, %al -; CMOV-NEXT: jne .LBB2_2 -; CMOV-NEXT: # %bb.1: ; CMOV-NEXT: movb $-1, %al +; CMOV-NEXT: je .LBB2_2 +; CMOV-NEXT: # %bb.1: +; CMOV-NEXT: movb $126, %al ; CMOV-NEXT: .LBB2_2: ; CMOV-NEXT: movzbl %al, %eax ; CMOV-NEXT: retq @@ -73,10 +73,10 @@ ; NO_CMOV-LABEL: cmov_zpromotion_8_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al -; NO_CMOV-NEXT: jne .LBB2_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: je .LBB2_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movb $126, %al ; NO_CMOV-NEXT: .LBB2_2: ; NO_CMOV-NEXT: movzbl %al, %eax ; NO_CMOV-NEXT: xorl %edx, %edx @@ -98,10 +98,10 @@ ; NO_CMOV-LABEL: cmov_zpromotion_16_to_32: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E -; NO_CMOV-NEXT: jne .LBB3_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movl $65535, %eax # imm = 0xFFFF +; NO_CMOV-NEXT: je .LBB3_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E ; NO_CMOV-NEXT: .LBB3_2: ; NO_CMOV-NEXT: retl %t0 = select i1 %c, i16 12414, i16 -1 @@ -121,10 +121,10 @@ ; NO_CMOV-LABEL: cmov_zpromotion_16_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E -; NO_CMOV-NEXT: jne .LBB4_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movl $65535, %eax # imm = 0xFFFF +; NO_CMOV-NEXT: je .LBB4_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E ; NO_CMOV-NEXT: .LBB4_2: ; NO_CMOV-NEXT: xorl %edx, %edx ; NO_CMOV-NEXT: retl @@ -145,10 +145,10 @@ ; NO_CMOV-LABEL: cmov_zpromotion_32_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E -; NO_CMOV-NEXT: jne .LBB5_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movl $-1, %eax +; NO_CMOV-NEXT: je .LBB5_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E ; NO_CMOV-NEXT: .LBB5_2: ; NO_CMOV-NEXT: xorl %edx, %edx ; NO_CMOV-NEXT: retl @@ -161,10 +161,10 @@ ; CMOV-LABEL: cmov_spromotion_8_to_16: ; CMOV: # %bb.0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movb $117, %al -; CMOV-NEXT: jne .LBB6_2 -; CMOV-NEXT: # %bb.1: ; CMOV-NEXT: movb $-19, %al +; CMOV-NEXT: je .LBB6_2 +; CMOV-NEXT: # %bb.1: +; CMOV-NEXT: movb $117, %al ; CMOV-NEXT: .LBB6_2: ; CMOV-NEXT: movsbl %al, %eax ; CMOV-NEXT: # kill: def $ax killed $ax killed $eax @@ -173,10 +173,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_8_to_16: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $117, %al -; NO_CMOV-NEXT: jne .LBB6_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movb $-19, %al +; NO_CMOV-NEXT: je .LBB6_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movb $117, %al ; NO_CMOV-NEXT: .LBB6_2: ; NO_CMOV-NEXT: movsbl %al, %eax ; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax @@ -190,10 +190,10 @@ ; CMOV-LABEL: cmov_spromotion_8_to_32: ; CMOV: # %bb.0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movb $126, %al -; CMOV-NEXT: jne .LBB7_2 -; CMOV-NEXT: # %bb.1: ; CMOV-NEXT: movb $-1, %al +; CMOV-NEXT: je .LBB7_2 +; CMOV-NEXT: # %bb.1: +; CMOV-NEXT: movb $126, %al ; CMOV-NEXT: .LBB7_2: ; CMOV-NEXT: movsbl %al, %eax ; CMOV-NEXT: retq @@ -201,10 +201,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_8_to_32: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al -; NO_CMOV-NEXT: jne .LBB7_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: je .LBB7_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movb $126, %al ; NO_CMOV-NEXT: .LBB7_2: ; NO_CMOV-NEXT: movsbl %al, %eax ; NO_CMOV-NEXT: retl @@ -217,10 +217,10 @@ ; CMOV-LABEL: cmov_spromotion_8_to_64: ; CMOV: # %bb.0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movb $126, %al -; CMOV-NEXT: jne .LBB8_2 -; CMOV-NEXT: # %bb.1: ; CMOV-NEXT: movb $-1, %al +; CMOV-NEXT: je .LBB8_2 +; CMOV-NEXT: # %bb.1: +; CMOV-NEXT: movb $126, %al ; CMOV-NEXT: .LBB8_2: ; CMOV-NEXT: movsbq %al, %rax ; CMOV-NEXT: retq @@ -228,10 +228,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_8_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al -; NO_CMOV-NEXT: jne .LBB8_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: je .LBB8_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movb $126, %al ; NO_CMOV-NEXT: .LBB8_2: ; NO_CMOV-NEXT: movsbl %al, %eax ; NO_CMOV-NEXT: movl %eax, %edx @@ -254,10 +254,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_16_to_32: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E -; NO_CMOV-NEXT: jne .LBB9_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movl $-1, %eax +; NO_CMOV-NEXT: je .LBB9_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E ; NO_CMOV-NEXT: .LBB9_2: ; NO_CMOV-NEXT: retl %t0 = select i1 %c, i16 12414, i16 -1 @@ -277,10 +277,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_16_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E -; NO_CMOV-NEXT: jne .LBB10_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movl $-1, %eax +; NO_CMOV-NEXT: je .LBB10_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E ; NO_CMOV-NEXT: .LBB10_2: ; NO_CMOV-NEXT: movl %eax, %edx ; NO_CMOV-NEXT: sarl $31, %edx @@ -302,10 +302,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_32_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E -; NO_CMOV-NEXT: jne .LBB11_2 -; NO_CMOV-NEXT: # %bb.1: ; NO_CMOV-NEXT: movl $-1, %eax +; NO_CMOV-NEXT: je .LBB11_2 +; NO_CMOV-NEXT: # %bb.1: +; NO_CMOV-NEXT: movl $12414, %eax # imm = 0x307E ; NO_CMOV-NEXT: .LBB11_2: ; NO_CMOV-NEXT: movl %eax, %edx ; NO_CMOV-NEXT: sarl $31, %edx Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -93,21 +93,24 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: setne %bl -; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: je .LBB3_4 -; CHECK-NEXT: # %bb.3: # %func_4.exit.i +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: .LBB3_4: # %func_4.exit.i ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB3_7 -; CHECK-NEXT: # %bb.5: # %func_4.exit.i +; CHECK-NEXT: je .LBB3_8 +; CHECK-NEXT: .LBB3_6: # %func_4.exit.i ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB3_7 -; CHECK-NEXT: # %bb.6: # %bb.i.i +; CHECK-NEXT: jne .LBB3_8 +; CHECK-NEXT: # %bb.7: # %bb.i.i ; CHECK-NEXT: movb {{.*}}(%rip), %cl ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: .LBB3_7: # %func_1.exit +; CHECK-NEXT: jmp .LBB3_8 +; CHECK-NEXT: .LBB3_4: # %func_4.exit.i +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_6 +; CHECK-NEXT: .LBB3_8: # %func_1.exit ; CHECK-NEXT: movb %cl, {{.*}}(%rip) ; CHECK-NEXT: movzbl %cl, %esi ; CHECK-NEXT: movl $_2E_str, %edi @@ -194,12 +197,12 @@ ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB6_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB6_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB6_1: +; CHECK-NEXT: .LBB6_2: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -244,29 +244,29 @@ ; Which was invalid as %12 is not the same value as %13 ; CHECK-LABEL: no_cascade_opt: -; CMOV-DAG: cmpl %edx, %esi -; CMOV-DAG: movb $20, %al -; CMOV-DAG: movb $20, %dl -; CMOV: jge [[BB2:.LBB[0-9_]+]] -; CMOV: jle [[BB3:.LBB[0-9_]+]] -; CMOV: [[BB0:.LBB[0-9_]+]] -; CMOV: testl %edi, %edi -; CMOV: jne [[BB4:.LBB[0-9_]+]] -; CMOV: [[BB1:.LBB[0-9_]+]] -; CMOV: movb %al, g8(%rip) -; CMOV: retq -; CMOV: [[BB2]]: -; CMOV: movl %ecx, %edx -; CMOV: jg [[BB0]] -; CMOV: [[BB3]]: -; CMOV: movl %edx, %eax -; CMOV: testl %edi, %edi -; CMOV: je [[BB1]] -; CMOV: [[BB4]]: -; CMOV: movl %edx, %eax -; CMOV: movb %al, g8(%rip) -; CMOV: retq -define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) { +; CMOV-NEXT: cmpl %edx, %esi +; CMOV-NEXT: jl [[BB2:.LBB[0-9_]+]] +; CMOV-NEXT: movl %ecx, %eax +; CMOV-NEXT: jg [[BB3:.LBB[0-9_]+]] +; CMOV-NEXT: [[BB0:.LBB[0-9_]+]] +; CMOV-NEXT: testl %edi, %edi +; CMOV-NEXT: je [[BB4:.LBB[0-9_]+]] +; CMOV-NEXT: [[BB1:.LBB[0-9_]+]] +; CMOV-NEXT: movb %cl, g8(%rip) +; CMOV-NEXT: retq +; CMOV-NEXT: [[BB2]]: +; CMOV-NEXT: movb $20, %cl +; CMOV-NEXT: movl %ecx, %eax +; CMOV-NEXT: jle [[BB0]] +; CMOV-NEXT: [[BB3]]: +; CMOV-NEXT: movb $20, %al +; CMOV-NEXT: testl %edi, %edi +; CMOV-NEXT: jne [[BB1]] +; CMOV-NEXT: [[BB4]]: +; CMOV-NEXT: movl %eax, %ecx +; CMOV-NEXT: movb %cl, g8(%rip) +; CMOV-NEXT: retq +define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) nounwind { entry: %c0 = icmp eq i32 %v0, 0 %c1 = icmp slt i32 %v1, %v2 Index: test/CodeGen/X86/cmpxchg-clobber-flags.ll =================================================================== --- test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -220,11 +220,10 @@ ; 32-GOOD-RA-NEXT: sete %bl ; 32-GOOD-RA-NEXT: calll foo ; 32-GOOD-RA-NEXT: testb %bl, %bl -; 32-GOOD-RA-NEXT: jne .LBB2_2 +; 32-GOOD-RA-NEXT: je .LBB2_2 ; 32-GOOD-RA-NEXT: # %bb.1: # %entry -; 32-GOOD-RA-NEXT: movl %eax, %esi -; 32-GOOD-RA-NEXT: .LBB2_2: # %entry ; 32-GOOD-RA-NEXT: movl %esi, %eax +; 32-GOOD-RA-NEXT: .LBB2_2: # %entry ; 32-GOOD-RA-NEXT: addl $4, %esp ; 32-GOOD-RA-NEXT: popl %esi ; 32-GOOD-RA-NEXT: popl %ebx @@ -242,11 +241,10 @@ ; 32-FAST-RA-NEXT: sete %bl ; 32-FAST-RA-NEXT: calll foo ; 32-FAST-RA-NEXT: testb %bl, %bl -; 32-FAST-RA-NEXT: jne .LBB2_2 +; 32-FAST-RA-NEXT: je .LBB2_2 ; 32-FAST-RA-NEXT: # %bb.1: # %entry -; 32-FAST-RA-NEXT: movl %eax, %esi -; 32-FAST-RA-NEXT: .LBB2_2: # %entry ; 32-FAST-RA-NEXT: movl %esi, %eax +; 32-FAST-RA-NEXT: .LBB2_2: # %entry ; 32-FAST-RA-NEXT: addl $4, %esp ; 32-FAST-RA-NEXT: popl %esi ; 32-FAST-RA-NEXT: popl %ebx Index: test/CodeGen/X86/copy-eflags.ll =================================================================== --- test/CodeGen/X86/copy-eflags.ll +++ test/CodeGen/X86/copy-eflags.ll @@ -210,8 +210,8 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movb {{[0-9]+}}(%esp), %ch ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB3_5: # %bb1 @@ -219,29 +219,31 @@ ; X32-NEXT: movl %esi, %eax ; X32-NEXT: cltd ; X32-NEXT: idivl %edi +; X32-NEXT: movb %ch, %al ; X32-NEXT: .LBB3_1: # %bb1 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movsbl %cl, %eax -; X32-NEXT: movl %eax, %edx -; X32-NEXT: sarl $31, %edx -; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sbbl %edx, %eax -; X32-NEXT: setl %al +; X32-NEXT: movsbl %al, %edx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: sarl $31, %edi +; X32-NEXT: cmpl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: sbbl %edi, %edx +; X32-NEXT: setl %ah ; X32-NEXT: setl %dl -; X32-NEXT: movzbl %dl, %edi -; X32-NEXT: negl %edi -; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_3 +; X32-NEXT: movzbl %dl, %edx +; X32-NEXT: negl %edx +; X32-NEXT: testb %ah, %ah +; X32-NEXT: movb %cl, %ch +; X32-NEXT: je .LBB3_3 ; X32-NEXT: # %bb.2: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %ch, %cl +; X32-NEXT: movb %al, %ch ; X32-NEXT: .LBB3_3: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %cl, (%ebp) -; X32-NEXT: movl (%ebx), %edx -; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_5 +; X32-NEXT: movb %ch, (%ebp) +; X32-NEXT: movl (%ebx), %edi +; X32-NEXT: testb %ah, %ah +; X32-NEXT: je .LBB3_5 ; X32-NEXT: # %bb.4: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movl %edx, %edi @@ -249,6 +251,7 @@ ; ; X64-LABEL: PR37100: ; X64: # %bb.0: # %bb +; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; X64-NEXT: jmp .LBB3_1 @@ -258,6 +261,7 @@ ; X64-NEXT: movl %r10d, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %esi +; X64-NEXT: movl %ebx, %edi ; X64-NEXT: .LBB3_1: # %bb1 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movsbq %dil, %rax @@ -266,13 +270,14 @@ ; X64-NEXT: setl %sil ; X64-NEXT: negl %esi ; X64-NEXT: cmpq %rax, %r11 -; X64-NEXT: jl .LBB3_3 +; X64-NEXT: movl %ecx, %ebx +; X64-NEXT: jge .LBB3_3 ; X64-NEXT: # %bb.2: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: movl %ecx, %edi +; X64-NEXT: movl %edi, %ebx ; X64-NEXT: .LBB3_3: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: movb %dil, (%r8) +; X64-NEXT: movb %bl, (%r8) ; X64-NEXT: jl .LBB3_5 ; X64-NEXT: # %bb.4: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 Index: test/CodeGen/X86/dagcombine-select.ll =================================================================== --- test/CodeGen/X86/dagcombine-select.ll +++ test/CodeGen/X86/dagcombine-select.ll @@ -45,12 +45,10 @@ ; CHECK-LABEL: select_and_v4: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: jl .LBB3_2 +; CHECK-NEXT: jge .LBB3_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> @@ -101,10 +99,12 @@ ; CHECK-LABEL: select_or_v4: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: jl .LBB7_2 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: jge .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> @@ -240,11 +240,11 @@ ; CHECK-LABEL: fsub_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB17_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB17_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB17_1: +; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %sel = select i1 %cond, double -4.0, double 23.3 @@ -256,11 +256,11 @@ ; CHECK-LABEL: fdiv_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB18_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB18_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB18_1: +; CHECK-NEXT: .LBB18_2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %sel = select i1 %cond, double -4.0, double 23.3 @@ -272,11 +272,11 @@ ; CHECK-LABEL: frem_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB19_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB19_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB19_1: +; CHECK-NEXT: .LBB19_2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %sel = select i1 %cond, double -4.0, double 23.3 Index: test/CodeGen/X86/extract-bits.ll =================================================================== --- test/CodeGen/X86/extract-bits.ll +++ test/CodeGen/X86/extract-bits.ll @@ -556,6 +556,7 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_a0: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -565,33 +566,38 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB7_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB7_2: -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %ebx ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB7_4 +; X86-NOBMI-NEXT: jne .LBB7_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB7_5 ; X86-NOBMI-NEXT: .LBB7_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB7_5: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_a0: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -601,33 +607,38 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB7_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edi, %esi ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB7_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %ebx ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB7_4 +; X86-BMI1NOTBM-NEXT: jne .LBB7_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB7_5 ; X86-BMI1NOTBM-NEXT: .LBB7_4: +; X86-BMI1NOTBM-NEXT: movl %ebx, %edx +; X86-BMI1NOTBM-NEXT: .LBB7_5: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl %esi, %eax ; X86-BMI1NOTBM-NEXT: andl %edi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi +; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_a0: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -637,23 +648,26 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB7_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB7_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %ebp ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %edx +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB7_4 +; X86-BMI1BMI2-NEXT: jne .LBB7_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %ecx, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB7_5 ; X86-BMI1BMI2-NEXT: .LBB7_4: +; X86-BMI1BMI2-NEXT: movl %ecx, %edx +; X86-BMI1BMI2-NEXT: .LBB7_5: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl %esi, %eax @@ -661,6 +675,7 @@ ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a0: @@ -699,6 +714,7 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_a0_arithmetic: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -715,27 +731,32 @@ ; X86-NOBMI-NEXT: movl %esi, %edi ; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB8_2: -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %ebx +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB8_4 +; X86-NOBMI-NEXT: jne .LBB8_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB8_5 ; X86-NOBMI-NEXT: .LBB8_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB8_5: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl %edi, %eax ; X86-NOBMI-NEXT: andl %esi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_a0_arithmetic: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -752,27 +773,32 @@ ; X86-BMI1NOTBM-NEXT: movl %esi, %edi ; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB8_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %ebx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB8_4 +; X86-BMI1NOTBM-NEXT: jne .LBB8_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB8_5 ; X86-BMI1NOTBM-NEXT: .LBB8_4: +; X86-BMI1NOTBM-NEXT: movl %ebx, %edx +; X86-BMI1NOTBM-NEXT: .LBB8_5: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl %edi, %eax ; X86-BMI1NOTBM-NEXT: andl %esi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi +; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_a0_arithmetic: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -789,17 +815,20 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: movl %eax, %edi ; X86-BMI1BMI2-NEXT: .LBB8_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %ebp +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %edx +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB8_4 +; X86-BMI1BMI2-NEXT: jne .LBB8_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %ecx, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB8_5 ; X86-BMI1BMI2-NEXT: .LBB8_4: +; X86-BMI1BMI2-NEXT: movl %ecx, %edx +; X86-BMI1BMI2-NEXT: .LBB8_5: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl %esi, %eax @@ -807,6 +836,7 @@ ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a0_arithmetic: @@ -845,6 +875,7 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_a1_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -854,33 +885,38 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB9_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB9_2: -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %ebx ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB9_4 +; X86-NOBMI-NEXT: jne .LBB9_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB9_5 ; X86-NOBMI-NEXT: .LBB9_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB9_5: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_a1_indexzext: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -890,33 +926,38 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB9_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edi, %esi ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB9_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %ebx ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB9_4 +; X86-BMI1NOTBM-NEXT: jne .LBB9_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB9_5 ; X86-BMI1NOTBM-NEXT: .LBB9_4: +; X86-BMI1NOTBM-NEXT: movl %ebx, %edx +; X86-BMI1NOTBM-NEXT: .LBB9_5: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl %esi, %eax ; X86-BMI1NOTBM-NEXT: andl %edi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi +; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_a1_indexzext: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -926,23 +967,26 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB9_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB9_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %ebp ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %edx +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB9_4 +; X86-BMI1BMI2-NEXT: jne .LBB9_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %ecx, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB9_5 ; X86-BMI1BMI2-NEXT: .LBB9_4: +; X86-BMI1BMI2-NEXT: movl %ecx, %edx +; X86-BMI1BMI2-NEXT: .LBB9_5: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl %esi, %eax @@ -950,6 +994,7 @@ ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a1_indexzext: @@ -993,6 +1038,7 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_a2_load: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -1003,33 +1049,38 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB10_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB10_2: -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %ebx ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB10_4 +; X86-NOBMI-NEXT: jne .LBB10_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB10_5 ; X86-NOBMI-NEXT: .LBB10_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB10_5: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_a2_load: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -1040,33 +1091,38 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB10_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edi, %esi ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB10_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %ebx ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB10_4 +; X86-BMI1NOTBM-NEXT: jne .LBB10_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB10_5 ; X86-BMI1NOTBM-NEXT: .LBB10_4: +; X86-BMI1NOTBM-NEXT: movl %ebx, %edx +; X86-BMI1NOTBM-NEXT: .LBB10_5: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl %esi, %eax ; X86-BMI1NOTBM-NEXT: andl %edi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi +; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_a2_load: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -1077,23 +1133,26 @@ ; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB10_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB10_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %ebp ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %edx +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB10_4 +; X86-BMI1BMI2-NEXT: jne .LBB10_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %ecx, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB10_5 ; X86-BMI1BMI2-NEXT: .LBB10_4: +; X86-BMI1BMI2-NEXT: movl %ecx, %edx +; X86-BMI1BMI2-NEXT: .LBB10_5: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl %esi, %eax @@ -1101,6 +1160,7 @@ ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a2_load: @@ -1142,6 +1202,7 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_a3_load_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -1152,33 +1213,38 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB11_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB11_2: -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %ebx ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB11_4 +; X86-NOBMI-NEXT: jne .LBB11_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB11_5 ; X86-NOBMI-NEXT: .LBB11_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB11_5: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -1189,33 +1255,38 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB11_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edi, %esi ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB11_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %ebx ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB11_4 +; X86-BMI1NOTBM-NEXT: jne .LBB11_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB11_5 ; X86-BMI1NOTBM-NEXT: .LBB11_4: +; X86-BMI1NOTBM-NEXT: movl %ebx, %edx +; X86-BMI1NOTBM-NEXT: .LBB11_5: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl %esi, %eax ; X86-BMI1NOTBM-NEXT: andl %edi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi +; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_a3_load_indexzext: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -1226,23 +1297,26 @@ ; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB11_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB11_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %ebp ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %edx +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB11_4 +; X86-BMI1BMI2-NEXT: jne .LBB11_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %ecx, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB11_5 ; X86-BMI1BMI2-NEXT: .LBB11_4: +; X86-BMI1BMI2-NEXT: movl %ecx, %edx +; X86-BMI1BMI2-NEXT: .LBB11_5: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl %esi, %eax @@ -1250,6 +1324,7 @@ ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a3_load_indexzext: @@ -1296,6 +1371,7 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_a4_commutative: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -1305,33 +1381,38 @@ ; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx ; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB12_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %edx, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB12_2: -; X86-NOBMI-NEXT: movl $1, %esi +; X86-NOBMI-NEXT: movl $1, %ebx ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shldl %cl, %esi, %edi -; X86-NOBMI-NEXT: shll %cl, %esi +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edi +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB12_4 +; X86-NOBMI-NEXT: jne .LBB12_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl %ebx, %esi +; X86-NOBMI-NEXT: jmp .LBB12_5 ; X86-NOBMI-NEXT: .LBB12_4: +; X86-NOBMI-NEXT: movl %ebx, %edi +; X86-NOBMI-NEXT: .LBB12_5: ; X86-NOBMI-NEXT: addl $-1, %esi ; X86-NOBMI-NEXT: adcl $-1, %edi ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_a4_commutative: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -1341,33 +1422,38 @@ ; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx ; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB12_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edx, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB12_2: -; X86-BMI1NOTBM-NEXT: movl $1, %esi +; X86-BMI1NOTBM-NEXT: movl $1, %ebx ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB12_4 +; X86-BMI1NOTBM-NEXT: jne .LBB12_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %ebx, %esi +; X86-BMI1NOTBM-NEXT: jmp .LBB12_5 ; X86-BMI1NOTBM-NEXT: .LBB12_4: +; X86-BMI1NOTBM-NEXT: movl %ebx, %edi +; X86-BMI1NOTBM-NEXT: .LBB12_5: ; X86-BMI1NOTBM-NEXT: addl $-1, %esi ; X86-BMI1NOTBM-NEXT: adcl $-1, %edi ; X86-BMI1NOTBM-NEXT: andl %esi, %eax ; X86-BMI1NOTBM-NEXT: andl %edi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi +; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_a4_commutative: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -1377,30 +1463,34 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB12_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edx, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB12_2: -; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl $1, %ebp +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edi, %ecx +; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %edi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB12_4 +; X86-BMI1BMI2-NEXT: jne .LBB12_4 ; X86-BMI1BMI2-NEXT: # %bb.3: ; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI1BMI2-NEXT: jmp .LBB12_5 ; X86-BMI1BMI2-NEXT: .LBB12_4: -; X86-BMI1BMI2-NEXT: addl $-1, %ecx -; X86-BMI1BMI2-NEXT: adcl $-1, %esi -; X86-BMI1BMI2-NEXT: andl %ecx, %eax -; X86-BMI1BMI2-NEXT: andl %esi, %edx +; X86-BMI1BMI2-NEXT: movl %ecx, %edi +; X86-BMI1BMI2-NEXT: .LBB12_5: +; X86-BMI1BMI2-NEXT: addl $-1, %esi +; X86-BMI1BMI2-NEXT: adcl $-1, %edi +; X86-BMI1BMI2-NEXT: andl %esi, %eax +; X86-BMI1BMI2-NEXT: andl %edi, %edx ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a4_commutative: @@ -1445,17 +1535,17 @@ ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl %esi, %ebp -; X86-NOBMI-NEXT: movl %eax, %ecx ; X86-NOBMI-NEXT: shrl %cl, %ebp -; X86-NOBMI-NEXT: shrdl %cl, %esi, %ebx -; X86-NOBMI-NEXT: testb $32, %al +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB13_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %ebp, %ebx +; X86-NOBMI-NEXT: movl %ebp, %eax ; X86-NOBMI-NEXT: xorl %ebp, %ebp ; X86-NOBMI-NEXT: .LBB13_2: ; X86-NOBMI-NEXT: movl $1, %esi @@ -1464,21 +1554,23 @@ ; X86-NOBMI-NEXT: shldl %cl, %esi, %edi ; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %dl -; X86-NOBMI-NEXT: je .LBB13_4 +; X86-NOBMI-NEXT: jne .LBB13_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl %esi, %ebx +; X86-NOBMI-NEXT: jmp .LBB13_5 ; X86-NOBMI-NEXT: .LBB13_4: -; X86-NOBMI-NEXT: addl $-1, %esi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: .LBB13_5: +; X86-NOBMI-NEXT: addl $-1, %ebx ; X86-NOBMI-NEXT: adcl $-1, %edi -; X86-NOBMI-NEXT: andl %ebx, %esi +; X86-NOBMI-NEXT: andl %eax, %ebx ; X86-NOBMI-NEXT: andl %ebp, %edi ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: pushl %eax +; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax ; X86-NOBMI-NEXT: movl %edi, %edx ; X86-NOBMI-NEXT: addl $12, %esp ; X86-NOBMI-NEXT: popl %esi @@ -1495,17 +1587,17 @@ ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: subl $12, %esp ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI1NOTBM-NEXT: movl %esi, %ebp -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB13_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx +; X86-BMI1NOTBM-NEXT: movl %ebp, %eax ; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp ; X86-BMI1NOTBM-NEXT: .LBB13_2: ; X86-BMI1NOTBM-NEXT: movl $1, %esi @@ -1514,21 +1606,23 @@ ; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi ; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: je .LBB13_4 +; X86-BMI1NOTBM-NEXT: jne .LBB13_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %esi, %ebx +; X86-BMI1NOTBM-NEXT: jmp .LBB13_5 ; X86-BMI1NOTBM-NEXT: .LBB13_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %esi +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: .LBB13_5: +; X86-BMI1NOTBM-NEXT: addl $-1, %ebx ; X86-BMI1NOTBM-NEXT: adcl $-1, %edi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi +; X86-BMI1NOTBM-NEXT: andl %eax, %ebx ; X86-BMI1NOTBM-NEXT: andl %ebp, %edi ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl %eax +; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %eax ; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: addl $12, %esp ; X86-BMI1NOTBM-NEXT: popl %esi @@ -1547,38 +1641,40 @@ ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl %eax, %ecx +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %ebx -; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %ebp -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %ebp +; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB13_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %ebp, %ebx ; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp ; X86-BMI1BMI2-NEXT: .LBB13_2: -; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi -; X86-BMI1BMI2-NEXT: shlxl %edx, %edi, %edi +; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI1BMI2-NEXT: shlxl %edx, %eax, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB13_4 +; X86-BMI1BMI2-NEXT: jne .LBB13_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi +; X86-BMI1BMI2-NEXT: movl %ecx, %esi +; X86-BMI1BMI2-NEXT: jmp .LBB13_5 ; X86-BMI1BMI2-NEXT: .LBB13_4: -; X86-BMI1BMI2-NEXT: addl $-1, %edi -; X86-BMI1BMI2-NEXT: adcl $-1, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: movl %ecx, %edi +; X86-BMI1BMI2-NEXT: .LBB13_5: +; X86-BMI1BMI2-NEXT: addl $-1, %esi +; X86-BMI1BMI2-NEXT: adcl $-1, %edi +; X86-BMI1BMI2-NEXT: andl %ebx, %esi +; X86-BMI1BMI2-NEXT: andl %ebp, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp ; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: pushl %eax +; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx +; X86-BMI1BMI2-NEXT: movl %esi, %eax +; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi @@ -2096,6 +2192,7 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b0: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2105,6 +2202,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB20_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2112,26 +2210,30 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB20_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB20_4 +; X86-NOBMI-NEXT: jne .LBB20_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB20_5 ; X86-NOBMI-NEXT: .LBB20_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB20_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b0: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi @@ -2142,32 +2244,37 @@ ; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx ; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB20_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB20_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %ebp +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB20_4 +; X86-BMI1NOTBM-NEXT: jne .LBB20_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %ebp, %edi +; X86-BMI1NOTBM-NEXT: jmp .LBB20_5 ; X86-BMI1NOTBM-NEXT: .LBB20_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx +; X86-BMI1NOTBM-NEXT: .LBB20_5: +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx +; X86-BMI1NOTBM-NEXT: popl %ebp ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_b0: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -2177,27 +2284,31 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB20_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edx, %esi ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB20_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebx +; X86-BMI1BMI2-NEXT: shlxl %eax, %ebx, %ebp ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi +; X86-BMI1BMI2-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB20_4 +; X86-BMI1BMI2-NEXT: jne .LBB20_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebp, %edi +; X86-BMI1BMI2-NEXT: jmp .LBB20_5 ; X86-BMI1BMI2-NEXT: .LBB20_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: movl %ebp, %ebx +; X86-BMI1BMI2-NEXT: .LBB20_5: +; X86-BMI1BMI2-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1BMI2-NEXT: andnl %esi, %edi, %eax ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b0: @@ -2236,6 +2347,7 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b1_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2245,6 +2357,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB21_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2252,26 +2365,30 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB21_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB21_4 +; X86-NOBMI-NEXT: jne .LBB21_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB21_5 ; X86-NOBMI-NEXT: .LBB21_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB21_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi @@ -2282,32 +2399,37 @@ ; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx ; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB21_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB21_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %ebp +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB21_4 +; X86-BMI1NOTBM-NEXT: jne .LBB21_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %ebp, %edi +; X86-BMI1NOTBM-NEXT: jmp .LBB21_5 ; X86-BMI1NOTBM-NEXT: .LBB21_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx +; X86-BMI1NOTBM-NEXT: .LBB21_5: +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx +; X86-BMI1NOTBM-NEXT: popl %ebp ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -2317,27 +2439,31 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB21_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edx, %esi ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB21_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebx +; X86-BMI1BMI2-NEXT: shlxl %eax, %ebx, %ebp ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi +; X86-BMI1BMI2-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB21_4 +; X86-BMI1BMI2-NEXT: jne .LBB21_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebp, %edi +; X86-BMI1BMI2-NEXT: jmp .LBB21_5 ; X86-BMI1BMI2-NEXT: .LBB21_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: movl %ebp, %ebx +; X86-BMI1BMI2-NEXT: .LBB21_5: +; X86-BMI1BMI2-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1BMI2-NEXT: andnl %esi, %edi, %eax ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b1_indexzext: @@ -2381,6 +2507,7 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b2_load: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2391,6 +2518,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB22_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2398,26 +2526,30 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB22_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB22_4 +; X86-NOBMI-NEXT: jne .LBB22_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB22_5 ; X86-NOBMI-NEXT: .LBB22_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB22_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b2_load: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi @@ -2429,32 +2561,37 @@ ; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx ; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB22_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB22_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %ebp +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB22_4 +; X86-BMI1NOTBM-NEXT: jne .LBB22_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %ebp, %edi +; X86-BMI1NOTBM-NEXT: jmp .LBB22_5 ; X86-BMI1NOTBM-NEXT: .LBB22_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx +; X86-BMI1NOTBM-NEXT: .LBB22_5: +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx +; X86-BMI1NOTBM-NEXT: popl %ebp ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_b2_load: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -2465,27 +2602,31 @@ ; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB22_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edx, %esi ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB22_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebx +; X86-BMI1BMI2-NEXT: shlxl %eax, %ebx, %ebp ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi +; X86-BMI1BMI2-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB22_4 +; X86-BMI1BMI2-NEXT: jne .LBB22_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebp, %edi +; X86-BMI1BMI2-NEXT: jmp .LBB22_5 ; X86-BMI1BMI2-NEXT: .LBB22_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: movl %ebp, %ebx +; X86-BMI1BMI2-NEXT: .LBB22_5: +; X86-BMI1BMI2-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1BMI2-NEXT: andnl %esi, %edi, %eax ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b2_load: @@ -2527,6 +2668,7 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b3_load_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2537,6 +2679,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB23_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2544,26 +2687,30 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB23_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB23_4 +; X86-NOBMI-NEXT: jne .LBB23_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB23_5 ; X86-NOBMI-NEXT: .LBB23_4: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB23_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi @@ -2575,32 +2722,37 @@ ; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx ; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB23_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB23_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %ebp +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB23_4 +; X86-BMI1NOTBM-NEXT: jne .LBB23_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %ebp, %edi +; X86-BMI1NOTBM-NEXT: jmp .LBB23_5 ; X86-BMI1NOTBM-NEXT: .LBB23_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx +; X86-BMI1NOTBM-NEXT: .LBB23_5: +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx +; X86-BMI1NOTBM-NEXT: popl %ebp ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -2611,27 +2763,31 @@ ; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB23_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edx, %esi ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB23_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebx +; X86-BMI1BMI2-NEXT: shlxl %eax, %ebx, %ebp ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi +; X86-BMI1BMI2-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB23_4 +; X86-BMI1BMI2-NEXT: jne .LBB23_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebp, %edi +; X86-BMI1BMI2-NEXT: jmp .LBB23_5 ; X86-BMI1BMI2-NEXT: .LBB23_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: movl %ebp, %ebx +; X86-BMI1BMI2-NEXT: .LBB23_5: +; X86-BMI1BMI2-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1BMI2-NEXT: andnl %esi, %edi, %eax ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b3_load_indexzext: @@ -2678,6 +2834,7 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b4_commutative: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2687,6 +2844,7 @@ ; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx ; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB24_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2694,26 +2852,30 @@ ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB24_2: ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %esi +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: shldl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB24_4 +; X86-NOBMI-NEXT: jne .LBB24_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl %ebx, %esi +; X86-NOBMI-NEXT: jmp .LBB24_5 ; X86-NOBMI-NEXT: .LBB24_4: +; X86-NOBMI-NEXT: movl %ebx, %edi +; X86-NOBMI-NEXT: .LBB24_5: ; X86-NOBMI-NEXT: notl %edi ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %esi ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi @@ -2724,32 +2886,37 @@ ; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx ; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB24_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB24_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %ebp +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB24_4 +; X86-BMI1NOTBM-NEXT: jne .LBB24_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %ebp, %edi +; X86-BMI1NOTBM-NEXT: jmp .LBB24_5 ; X86-BMI1NOTBM-NEXT: .LBB24_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx +; X86-BMI1NOTBM-NEXT: .LBB24_5: +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx +; X86-BMI1NOTBM-NEXT: popl %ebp ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_b4_commutative: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi @@ -2759,27 +2926,31 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB24_2 ; X86-BMI1BMI2-NEXT: # %bb.1: ; X86-BMI1BMI2-NEXT: movl %edx, %esi ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB24_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebx +; X86-BMI1BMI2-NEXT: shlxl %eax, %ebx, %ebp ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi +; X86-BMI1BMI2-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB24_4 +; X86-BMI1BMI2-NEXT: jne .LBB24_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebp, %edi +; X86-BMI1BMI2-NEXT: jmp .LBB24_5 ; X86-BMI1BMI2-NEXT: .LBB24_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: movl %ebp, %ebx +; X86-BMI1BMI2-NEXT: .LBB24_5: +; X86-BMI1BMI2-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1BMI2-NEXT: andnl %esi, %edi, %eax ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx +; X86-BMI1BMI2-NEXT: popl %ebp ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b4_commutative: @@ -2824,41 +2995,43 @@ ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl %esi, %ebp -; X86-NOBMI-NEXT: movl %eax, %ecx ; X86-NOBMI-NEXT: shrl %cl, %ebp -; X86-NOBMI-NEXT: shrdl %cl, %esi, %ebx -; X86-NOBMI-NEXT: testb $32, %al +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB25_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %ebp, %ebx +; X86-NOBMI-NEXT: movl %ebp, %eax ; X86-NOBMI-NEXT: xorl %ebp, %ebp ; X86-NOBMI-NEXT: .LBB25_2: -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi +; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl %edx, %ecx -; X86-NOBMI-NEXT: shll %cl, %edi -; X86-NOBMI-NEXT: shldl %cl, %esi, %esi +; X86-NOBMI-NEXT: shll %cl, %esi +; X86-NOBMI-NEXT: shldl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %dl -; X86-NOBMI-NEXT: je .LBB25_4 +; X86-NOBMI-NEXT: jne .LBB25_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi +; X86-NOBMI-NEXT: movl %esi, %ebx +; X86-NOBMI-NEXT: jmp .LBB25_5 ; X86-NOBMI-NEXT: .LBB25_4: -; X86-NOBMI-NEXT: notl %esi -; X86-NOBMI-NEXT: andl %ebp, %esi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: .LBB25_5: ; X86-NOBMI-NEXT: notl %edi -; X86-NOBMI-NEXT: andl %ebx, %edi +; X86-NOBMI-NEXT: andl %ebp, %edi +; X86-NOBMI-NEXT: notl %ebx +; X86-NOBMI-NEXT: andl %eax, %ebx ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOBMI-NEXT: pushl %eax +; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %edi, %edx ; X86-NOBMI-NEXT: addl $12, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -2873,33 +3046,36 @@ ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx +; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: movb %al, %cl ; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %edi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB25_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %esi, %edi ; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB25_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx +; X86-BMI1NOTBM-NEXT: movb %ch, %cl ; X86-BMI1NOTBM-NEXT: shll %cl, %ebp -; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: je .LBB25_4 +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx +; X86-BMI1NOTBM-NEXT: testb $32, %ch +; X86-BMI1NOTBM-NEXT: jne .LBB25_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: ; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp +; X86-BMI1NOTBM-NEXT: jmp .LBB25_5 ; X86-BMI1NOTBM-NEXT: .LBB25_4: -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %esi -; X86-BMI1NOTBM-NEXT: andnl %edi, %ebp, %edi +; X86-BMI1NOTBM-NEXT: movl %ebp, %edx +; X86-BMI1NOTBM-NEXT: .LBB25_5: +; X86-BMI1NOTBM-NEXT: andnl %esi, %edx, %esi +; X86-BMI1NOTBM-NEXT: andnl %edi, %ebx, %edi ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) ; X86-BMI1NOTBM-NEXT: pushl %eax @@ -2921,13 +3097,14 @@ ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %esi +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1BMI2-NEXT: shrxl %eax, %edx, %esi +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB25_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -2935,17 +3112,19 @@ ; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: .LBB25_2: ; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shlxl %edx, %ebp, %ebx -; X86-BMI1BMI2-NEXT: movl %edx, %ecx +; X86-BMI1BMI2-NEXT: movl %ebx, %ecx +; X86-BMI1BMI2-NEXT: shlxl %ebx, %ebp, %ebx ; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %ebp -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB25_4 +; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: jne .LBB25_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebx, %edx +; X86-BMI1BMI2-NEXT: jmp .LBB25_5 ; X86-BMI1BMI2-NEXT: .LBB25_4: +; X86-BMI1BMI2-NEXT: movl %ebx, %ebp +; X86-BMI1BMI2-NEXT: .LBB25_5: ; X86-BMI1BMI2-NEXT: andnl %esi, %ebp, %esi -; X86-BMI1BMI2-NEXT: andnl %edi, %ebx, %edi +; X86-BMI1BMI2-NEXT: andnl %edi, %edx, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp ; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) ; X86-BMI1BMI2-NEXT: pushl %eax @@ -3867,6 +4046,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB32_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -3876,15 +4056,17 @@ ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %ebp -; X86-NOBMI-NEXT: movl $-1, %ebx -; X86-NOBMI-NEXT: shrl %cl, %ebx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB32_4 +; X86-NOBMI-NEXT: jne .LBB32_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: jmp .LBB32_5 ; X86-NOBMI-NEXT: .LBB32_4: +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: .LBB32_5: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %ebp @@ -3914,6 +4096,7 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB32_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -3923,15 +4106,17 @@ ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shrl %cl, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB32_4 +; X86-BMI1NOTBM-NEXT: jne .LBB32_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: jmp .LBB32_5 ; X86-BMI1NOTBM-NEXT: .LBB32_4: +; X86-BMI1NOTBM-NEXT: movl %eax, %ebp +; X86-BMI1NOTBM-NEXT: .LBB32_5: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %ebp @@ -3960,6 +4145,7 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB32_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -3968,22 +4154,24 @@ ; X86-BMI1BMI2-NEXT: .LBB32_2: ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %eax +; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB32_4 +; X86-BMI1BMI2-NEXT: jne .LBB32_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %eax, %ebx +; X86-BMI1BMI2-NEXT: jmp .LBB32_5 ; X86-BMI1BMI2-NEXT: .LBB32_4: +; X86-BMI1BMI2-NEXT: movl %eax, %ebp +; X86-BMI1BMI2-NEXT: .LBB32_5: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -4076,6 +4264,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB33_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -4085,15 +4274,17 @@ ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %ebp -; X86-NOBMI-NEXT: movl $-1, %ebx -; X86-NOBMI-NEXT: shrl %cl, %ebx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB33_4 +; X86-NOBMI-NEXT: jne .LBB33_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: jmp .LBB33_5 ; X86-NOBMI-NEXT: .LBB33_4: +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: .LBB33_5: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %ebp @@ -4123,6 +4314,7 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB33_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -4132,15 +4324,17 @@ ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shrl %cl, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB33_4 +; X86-BMI1NOTBM-NEXT: jne .LBB33_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: jmp .LBB33_5 ; X86-BMI1NOTBM-NEXT: .LBB33_4: +; X86-BMI1NOTBM-NEXT: movl %eax, %ebp +; X86-BMI1NOTBM-NEXT: .LBB33_5: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %ebp @@ -4169,6 +4363,7 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB33_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -4177,22 +4372,24 @@ ; X86-BMI1BMI2-NEXT: .LBB33_2: ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %eax +; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB33_4 +; X86-BMI1BMI2-NEXT: jne .LBB33_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %eax, %ebx +; X86-BMI1BMI2-NEXT: jmp .LBB33_5 ; X86-BMI1BMI2-NEXT: .LBB33_4: +; X86-BMI1BMI2-NEXT: movl %eax, %ebp +; X86-BMI1BMI2-NEXT: .LBB33_5: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -4289,6 +4486,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB34_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -4298,15 +4496,17 @@ ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %ebp -; X86-NOBMI-NEXT: movl $-1, %ebx -; X86-NOBMI-NEXT: shrl %cl, %ebx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB34_4 +; X86-NOBMI-NEXT: jne .LBB34_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: jmp .LBB34_5 ; X86-NOBMI-NEXT: .LBB34_4: +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: .LBB34_5: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %ebp @@ -4337,6 +4537,7 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB34_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -4346,15 +4547,17 @@ ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shrl %cl, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB34_4 +; X86-BMI1NOTBM-NEXT: jne .LBB34_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: jmp .LBB34_5 ; X86-BMI1NOTBM-NEXT: .LBB34_4: +; X86-BMI1NOTBM-NEXT: movl %eax, %ebp +; X86-BMI1NOTBM-NEXT: .LBB34_5: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %ebp @@ -4384,6 +4587,7 @@ ; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB34_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -4392,22 +4596,24 @@ ; X86-BMI1BMI2-NEXT: .LBB34_2: ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %eax +; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB34_4 +; X86-BMI1BMI2-NEXT: jne .LBB34_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %eax, %ebx +; X86-BMI1BMI2-NEXT: jmp .LBB34_5 ; X86-BMI1BMI2-NEXT: .LBB34_4: +; X86-BMI1BMI2-NEXT: movl %eax, %ebp +; X86-BMI1BMI2-NEXT: .LBB34_5: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -4502,6 +4708,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB35_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -4511,15 +4718,17 @@ ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %ebp -; X86-NOBMI-NEXT: movl $-1, %ebx -; X86-NOBMI-NEXT: shrl %cl, %ebx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB35_4 +; X86-NOBMI-NEXT: jne .LBB35_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: jmp .LBB35_5 ; X86-NOBMI-NEXT: .LBB35_4: +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: .LBB35_5: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %ebp @@ -4550,6 +4759,7 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB35_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -4559,15 +4769,17 @@ ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shrl %cl, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB35_4 +; X86-BMI1NOTBM-NEXT: jne .LBB35_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: jmp .LBB35_5 ; X86-BMI1NOTBM-NEXT: .LBB35_4: +; X86-BMI1NOTBM-NEXT: movl %eax, %ebp +; X86-BMI1NOTBM-NEXT: .LBB35_5: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %ebp @@ -4597,6 +4809,7 @@ ; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB35_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -4605,22 +4818,24 @@ ; X86-BMI1BMI2-NEXT: .LBB35_2: ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %eax +; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB35_4 +; X86-BMI1BMI2-NEXT: jne .LBB35_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %eax, %ebx +; X86-BMI1BMI2-NEXT: jmp .LBB35_5 ; X86-BMI1BMI2-NEXT: .LBB35_4: +; X86-BMI1BMI2-NEXT: movl %eax, %ebp +; X86-BMI1BMI2-NEXT: .LBB35_5: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -4717,6 +4932,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB36_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -4726,15 +4942,17 @@ ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %ebp -; X86-NOBMI-NEXT: movl $-1, %ebx -; X86-NOBMI-NEXT: shrl %cl, %ebx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB36_4 +; X86-NOBMI-NEXT: jne .LBB36_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: jmp .LBB36_5 ; X86-NOBMI-NEXT: .LBB36_4: +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: .LBB36_5: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %ebp @@ -4764,6 +4982,7 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB36_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -4773,15 +4992,17 @@ ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shrl %cl, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB36_4 +; X86-BMI1NOTBM-NEXT: jne .LBB36_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: jmp .LBB36_5 ; X86-BMI1NOTBM-NEXT: .LBB36_4: +; X86-BMI1NOTBM-NEXT: movl %eax, %ebp +; X86-BMI1NOTBM-NEXT: .LBB36_5: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %ebp @@ -4810,6 +5031,7 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB36_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -4818,22 +5040,24 @@ ; X86-BMI1BMI2-NEXT: .LBB36_2: ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %eax +; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB36_4 +; X86-BMI1BMI2-NEXT: jne .LBB36_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %eax, %ebx +; X86-BMI1BMI2-NEXT: jmp .LBB36_5 ; X86-BMI1BMI2-NEXT: .LBB36_4: +; X86-BMI1BMI2-NEXT: movl %eax, %ebp +; X86-BMI1BMI2-NEXT: .LBB36_5: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -4926,6 +5150,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB37_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -4935,15 +5160,17 @@ ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %ebx -; X86-NOBMI-NEXT: movl $-1, %ebp -; X86-NOBMI-NEXT: shrl %cl, %ebp +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: shrdl %cl, %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB37_4 +; X86-NOBMI-NEXT: jne .LBB37_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %ebp, %ebx -; X86-NOBMI-NEXT: xorl %ebp, %ebp +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: jmp .LBB37_5 ; X86-NOBMI-NEXT: .LBB37_4: +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: .LBB37_5: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebp ; X86-NOBMI-NEXT: pushl %ebx @@ -4978,6 +5205,7 @@ ; X86-BMI1NOTBM-NEXT: movl %eax, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB37_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -4987,15 +5215,17 @@ ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shrl %cl, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB37_4 +; X86-BMI1NOTBM-NEXT: jne .LBB37_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp +; X86-BMI1NOTBM-NEXT: movl %eax, %ebp +; X86-BMI1NOTBM-NEXT: jmp .LBB37_5 ; X86-BMI1NOTBM-NEXT: .LBB37_4: +; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: .LBB37_5: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx @@ -5029,6 +5259,7 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi ; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB37_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -5037,22 +5268,24 @@ ; X86-BMI1BMI2-NEXT: .LBB37_2: ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %ebx -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl $-1, %ebx +; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %eax +; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB37_4 +; X86-BMI1BMI2-NEXT: jne .LBB37_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %eax, %ebp +; X86-BMI1BMI2-NEXT: jmp .LBB37_5 ; X86-BMI1BMI2-NEXT: .LBB37_4: +; X86-BMI1BMI2-NEXT: movl %eax, %ebx +; X86-BMI1BMI2-NEXT: .LBB37_5: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %ebp +; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi +; X86-BMI1BMI2-NEXT: andl %ebx, %esi +; X86-BMI1BMI2-NEXT: andl %ebp, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp ; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) ; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) @@ -5524,98 +5757,90 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_d0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl %edx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: shrdl %cl, %edx, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edx, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB43_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB43_2: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: shldl %cl, %edi, %eax -; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: shldl %cl, %eax, %esi +; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %edi, %ebx -; X86-NOBMI-NEXT: jne .LBB43_4 +; X86-NOBMI-NEXT: je .LBB43_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB43_4: -; X86-NOBMI-NEXT: movl %ebx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB43_6 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB43_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB43_6: -; X86-NOBMI-NEXT: shrdl %cl, %ebx, %esi +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB43_8 +; X86-NOBMI-NEXT: je .LBB43_8 ; X86-NOBMI-NEXT: # %bb.7: -; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB43_8: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_d0: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB43_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB43_2: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB43_4 +; X86-BMI1NOTBM-NEXT: je .LBB43_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB43_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB43_6 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB43_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB43_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB43_8 +; X86-BMI1NOTBM-NEXT: je .LBB43_8 ; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB43_8: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_d0: @@ -5627,7 +5852,6 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB43_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -5637,21 +5861,22 @@ ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB43_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB43_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB43_6 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB43_6 ; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB43_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB43_8 +; X86-BMI1BMI2-NEXT: je .LBB43_8 ; X86-BMI1BMI2-NEXT: # %bb.7: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB43_8: @@ -5695,98 +5920,90 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_d1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl %edx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: shrdl %cl, %edx, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edx, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB44_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB44_2: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: shldl %cl, %edi, %eax -; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: shldl %cl, %eax, %esi +; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %edi, %ebx -; X86-NOBMI-NEXT: jne .LBB44_4 +; X86-NOBMI-NEXT: je .LBB44_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB44_4: -; X86-NOBMI-NEXT: movl %ebx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB44_6 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB44_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB44_6: -; X86-NOBMI-NEXT: shrdl %cl, %ebx, %esi +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB44_8 +; X86-NOBMI-NEXT: je .LBB44_8 ; X86-NOBMI-NEXT: # %bb.7: -; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB44_8: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_d1_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB44_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB44_2: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB44_4 +; X86-BMI1NOTBM-NEXT: je .LBB44_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB44_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB44_6 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB44_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB44_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB44_8 +; X86-BMI1NOTBM-NEXT: je .LBB44_8 ; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB44_8: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_d1_indexzext: @@ -5798,7 +6015,6 @@ ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB44_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -5808,21 +6024,22 @@ ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB44_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB44_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB44_6 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB44_6 ; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB44_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB44_8 +; X86-BMI1BMI2-NEXT: je .LBB44_8 ; X86-BMI1BMI2-NEXT: # %bb.7: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB44_8: @@ -5871,100 +6088,92 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_d2_load: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl (%eax), %edi -; X86-NOBMI-NEXT: movl 4(%eax), %edx -; X86-NOBMI-NEXT: movl %edx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: shrdl %cl, %edx, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl (%edx), %eax +; X86-NOBMI-NEXT: movl 4(%edx), %edx +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edx, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB45_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB45_2: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: shldl %cl, %edi, %eax -; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: shldl %cl, %eax, %esi +; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %edi, %ebx -; X86-NOBMI-NEXT: jne .LBB45_4 +; X86-NOBMI-NEXT: je .LBB45_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB45_4: -; X86-NOBMI-NEXT: movl %ebx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB45_6 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB45_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB45_6: -; X86-NOBMI-NEXT: shrdl %cl, %ebx, %esi +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB45_8 +; X86-NOBMI-NEXT: je .LBB45_8 ; X86-NOBMI-NEXT: # %bb.7: -; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB45_8: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_d2_load: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1NOTBM-NEXT: movl (%edx), %eax +; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edx +; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB45_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB45_2: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB45_4 +; X86-BMI1NOTBM-NEXT: je .LBB45_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB45_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB45_6 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB45_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB45_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB45_8 +; X86-BMI1NOTBM-NEXT: je .LBB45_8 ; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB45_8: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_d2_load: @@ -5977,7 +6186,6 @@ ; X86-BMI1BMI2-NEXT: movl 4(%edx), %edx ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB45_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -5987,21 +6195,22 @@ ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB45_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB45_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB45_6 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB45_6 ; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB45_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB45_8 +; X86-BMI1BMI2-NEXT: je .LBB45_8 ; X86-BMI1BMI2-NEXT: # %bb.7: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB45_8: @@ -6047,100 +6256,92 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_d3_load_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl (%eax), %edi -; X86-NOBMI-NEXT: movl 4(%eax), %edx -; X86-NOBMI-NEXT: movl %edx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: shrdl %cl, %edx, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl (%edx), %eax +; X86-NOBMI-NEXT: movl 4(%edx), %edx +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edx, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB46_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB46_2: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: shldl %cl, %edi, %eax -; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: shldl %cl, %eax, %esi +; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %edi, %ebx -; X86-NOBMI-NEXT: jne .LBB46_4 +; X86-NOBMI-NEXT: je .LBB46_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB46_4: -; X86-NOBMI-NEXT: movl %ebx, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB46_6 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB46_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB46_6: -; X86-NOBMI-NEXT: shrdl %cl, %ebx, %esi +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB46_8 +; X86-NOBMI-NEXT: je .LBB46_8 ; X86-NOBMI-NEXT: # %bb.7: -; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB46_8: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1NOTBM-NEXT: movl (%edx), %eax +; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edx +; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB46_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB46_2: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %esi +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB46_4 +; X86-BMI1NOTBM-NEXT: je .LBB46_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB46_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB46_6 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB46_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB46_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB46_8 +; X86-BMI1NOTBM-NEXT: je .LBB46_8 ; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB46_8: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_d3_load_indexzext: @@ -6153,7 +6354,6 @@ ; X86-BMI1BMI2-NEXT: movl 4(%edx), %edx ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi ; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB46_2 ; X86-BMI1BMI2-NEXT: # %bb.1: @@ -6163,21 +6363,22 @@ ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB46_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB46_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB46_6 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB46_6 ; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB46_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB46_8 +; X86-BMI1BMI2-NEXT: je .LBB46_8 ; X86-BMI1BMI2-NEXT: # %bb.7: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB46_8: @@ -6228,50 +6429,46 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_d5_skipextrauses: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: movl %edi, %edx ; X86-NOBMI-NEXT: movl %eax, %ecx -; X86-NOBMI-NEXT: shrl %cl, %esi -; X86-NOBMI-NEXT: shrdl %cl, %edx, %ebx -; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: shrdl %cl, %edi, %esi ; X86-NOBMI-NEXT: testb $32, %al ; X86-NOBMI-NEXT: je .LBB47_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %esi, %ebx -; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB47_2: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: shldl %cl, %ebx, %esi -; X86-NOBMI-NEXT: shll %cl, %ebx +; X86-NOBMI-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %ebx, %ebp -; X86-NOBMI-NEXT: jne .LBB47_4 +; X86-NOBMI-NEXT: je .LBB47_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %ebp +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: .LBB47_4: -; X86-NOBMI-NEXT: movl %ebp, %esi -; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: movl %edx, %ebx +; X86-NOBMI-NEXT: shrl %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edi -; X86-NOBMI-NEXT: jne .LBB47_6 +; X86-NOBMI-NEXT: movl %ebx, %edi +; X86-NOBMI-NEXT: je .LBB47_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %ebx, %edx -; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: xorl %esi, %esi +; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB47_6: -; X86-NOBMI-NEXT: shrdl %cl, %ebp, %edx +; X86-NOBMI-NEXT: shrdl %cl, %edx, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: jne .LBB47_8 +; X86-NOBMI-NEXT: je .LBB47_8 ; X86-NOBMI-NEXT: # %bb.7: -; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: movl %ebx, %esi ; X86-NOBMI-NEXT: .LBB47_8: ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ecx @@ -6280,59 +6477,53 @@ ; X86-NOBMI-NEXT: addl $16, %esp ; X86-NOBMI-NEXT: movl %esi, %eax ; X86-NOBMI-NEXT: movl %edi, %edx -; X86-NOBMI-NEXT: addl $12, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: popl %ebx -; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: movl %edi, %edx ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %ebx -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx +; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB47_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB47_2: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx +; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: jne .LBB47_4 +; X86-BMI1NOTBM-NEXT: je .LBB47_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebp +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: .LBB47_4: -; X86-BMI1NOTBM-NEXT: movl %ebp, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: movl %edx, %ebx +; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB47_6 +; X86-BMI1NOTBM-NEXT: movl %ebx, %edi +; X86-BMI1NOTBM-NEXT: je .LBB47_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edx -; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB47_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %edx +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: jne .LBB47_8 +; X86-BMI1NOTBM-NEXT: je .LBB47_8 ; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi +; X86-BMI1NOTBM-NEXT: movl %ebx, %esi ; X86-BMI1NOTBM-NEXT: .LBB47_8: ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ecx @@ -6341,11 +6532,9 @@ ; X86-BMI1NOTBM-NEXT: addl $16, %esp ; X86-BMI1NOTBM-NEXT: movl %esi, %eax ; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bextr64_d5_skipextrauses: @@ -6353,48 +6542,48 @@ ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi ; X86-BMI1BMI2-NEXT: shrxl %eax, %edx, %edx -; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB47_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %edi +; X86-BMI1BMI2-NEXT: movl %edx, %esi ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB47_2: ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edi, %ebx +; X86-BMI1BMI2-NEXT: shldl %cl, %esi, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %esi, %esi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB47_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edx -; X86-BMI1BMI2-NEXT: movl $0, %ebx +; X86-BMI1BMI2-NEXT: movl %esi, %edx +; X86-BMI1BMI2-NEXT: movl $0, %esi ; X86-BMI1BMI2-NEXT: .LBB47_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: jne .LBB47_6 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %ebx +; X86-BMI1BMI2-NEXT: movl %ebx, %edi +; X86-BMI1BMI2-NEXT: je .LBB47_6 ; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %esi +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB47_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %ebx +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: jne .LBB47_8 +; X86-BMI1BMI2-NEXT: je .LBB47_8 ; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi +; X86-BMI1BMI2-NEXT: movl %ebx, %esi ; X86-BMI1BMI2-NEXT: .LBB47_8: ; X86-BMI1BMI2-NEXT: subl $8, %esp ; X86-BMI1BMI2-NEXT: pushl %ecx ; X86-BMI1BMI2-NEXT: pushl %eax ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx +; X86-BMI1BMI2-NEXT: movl %esi, %eax +; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx Index: test/CodeGen/X86/extract-lowbits.ll =================================================================== --- test/CodeGen/X86/extract-lowbits.ll +++ test/CodeGen/X86/extract-lowbits.ll @@ -287,59 +287,74 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a0: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB5_2 +; X86-NOBMI-NEXT: jne .LBB5_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: jmp .LBB5_3 ; X86-NOBMI-NEXT: .LBB5_2: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB5_3: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_a0: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB5_2 +; X86-BMI1NOTBM-NEXT: jne .LBB5_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB5_3 ; X86-BMI1NOTBM-NEXT: .LBB5_2: +; X86-BMI1NOTBM-NEXT: movl %esi, %edx +; X86-BMI1NOTBM-NEXT: .LBB5_3: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_a0: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %esi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %esi, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %esi, %esi ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB5_2 +; X86-BMI1BMI2-NEXT: jne .LBB5_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB5_3 ; X86-BMI1BMI2-NEXT: .LBB5_2: +; X86-BMI1BMI2-NEXT: movl %esi, %edx +; X86-BMI1BMI2-NEXT: .LBB5_3: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a0: @@ -371,59 +386,74 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a1_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB6_2 +; X86-NOBMI-NEXT: jne .LBB6_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: jmp .LBB6_3 ; X86-NOBMI-NEXT: .LBB6_2: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB6_3: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB6_2 +; X86-BMI1NOTBM-NEXT: jne .LBB6_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB6_3 ; X86-BMI1NOTBM-NEXT: .LBB6_2: +; X86-BMI1NOTBM-NEXT: movl %esi, %edx +; X86-BMI1NOTBM-NEXT: .LBB6_3: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %esi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %esi, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %esi, %esi ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB6_2 +; X86-BMI1BMI2-NEXT: jne .LBB6_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB6_3 ; X86-BMI1BMI2-NEXT: .LBB6_2: +; X86-BMI1BMI2-NEXT: movl %esi, %edx +; X86-BMI1BMI2-NEXT: .LBB6_3: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a1_indexzext: @@ -458,68 +488,83 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a2_load: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %edi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %edi, %edx +; X86-NOBMI-NEXT: shll %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB7_2 +; X86-NOBMI-NEXT: jne .LBB7_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: jmp .LBB7_3 ; X86-NOBMI-NEXT: .LBB7_2: +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: .LBB7_3: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl 4(%esi), %edx ; X86-NOBMI-NEXT: andl (%esi), %eax ; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_a2_load: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %edi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB7_2 +; X86-BMI1NOTBM-NEXT: jne .LBB7_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB7_3 ; X86-BMI1NOTBM-NEXT: .LBB7_2: +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: .LBB7_3: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx ; X86-BMI1NOTBM-NEXT: andl (%esi), %eax ; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_a2_load: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %edi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB7_2 +; X86-BMI1BMI2-NEXT: jne .LBB7_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB7_3 ; X86-BMI1BMI2-NEXT: .LBB7_2: +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: .LBB7_3: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx ; X86-BMI1BMI2-NEXT: andl (%esi), %eax ; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a2_load: @@ -552,68 +597,83 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %edi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %edi, %edx +; X86-NOBMI-NEXT: shll %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB8_2 +; X86-NOBMI-NEXT: jne .LBB8_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: jmp .LBB8_3 ; X86-NOBMI-NEXT: .LBB8_2: +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: .LBB8_3: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl 4(%esi), %edx ; X86-NOBMI-NEXT: andl (%esi), %eax ; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %edi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB8_2 +; X86-BMI1NOTBM-NEXT: jne .LBB8_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB8_3 ; X86-BMI1NOTBM-NEXT: .LBB8_2: +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: .LBB8_3: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx ; X86-BMI1NOTBM-NEXT: andl (%esi), %eax ; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %edi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB8_2 +; X86-BMI1BMI2-NEXT: jne .LBB8_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB8_3 ; X86-BMI1BMI2-NEXT: .LBB8_2: +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: .LBB8_3: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx ; X86-BMI1BMI2-NEXT: andl (%esi), %eax ; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: @@ -649,59 +709,74 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a4_commutative: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $1, %eax +; X86-NOBMI-NEXT: movl $1, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx -; X86-NOBMI-NEXT: shldl %cl, %eax, %edx -; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB9_2 +; X86-NOBMI-NEXT: jne .LBB9_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: jmp .LBB9_3 ; X86-NOBMI-NEXT: .LBB9_2: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB9_3: ; X86-NOBMI-NEXT: addl $-1, %eax ; X86-NOBMI-NEXT: adcl $-1, %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_a4_commutative: ; X86-BMI1NOTBM: # %bb.0: +; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax +; X86-BMI1NOTBM-NEXT: movl $1, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edx +; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB9_2 +; X86-BMI1NOTBM-NEXT: jne .LBB9_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: jmp .LBB9_3 ; X86-BMI1NOTBM-NEXT: .LBB9_2: +; X86-BMI1NOTBM-NEXT: movl %esi, %edx +; X86-BMI1NOTBM-NEXT: .LBB9_3: ; X86-BMI1NOTBM-NEXT: addl $-1, %eax ; X86-BMI1NOTBM-NEXT: adcl $-1, %edx ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative: ; X86-BMI1BMI2: # %bb.0: +; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax +; X86-BMI1BMI2-NEXT: movl $1, %esi +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI1BMI2-NEXT: shldl %cl, %esi, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %esi, %esi ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB9_2 +; X86-BMI1BMI2-NEXT: jne .LBB9_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %eax +; X86-BMI1BMI2-NEXT: jmp .LBB9_3 ; X86-BMI1BMI2-NEXT: .LBB9_2: +; X86-BMI1BMI2-NEXT: movl %esi, %edx +; X86-BMI1BMI2-NEXT: .LBB9_3: ; X86-BMI1BMI2-NEXT: addl $-1, %eax ; X86-BMI1BMI2-NEXT: adcl $-1, %edx ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a4_commutative: @@ -1013,21 +1088,19 @@ ; ; X86-BMI1NOTBM-LABEL: bzhi64_b0: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB15_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB15_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b0: @@ -1096,21 +1169,19 @@ ; ; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB16_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB16_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: @@ -1363,21 +1434,19 @@ ; ; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB19_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB19_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: @@ -2005,25 +2074,25 @@ ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB25_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB25_2: ; X86-NOBMI-NEXT: subl $8, %esp -; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: addl $4, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -2036,25 +2105,25 @@ ; X86-BMI1NOTBM-NEXT: pushl %eax ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi +; X86-BMI1NOTBM-NEXT: movl $-1, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB25_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB25_2: ; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi +; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: addl $4, %esp ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi @@ -2162,25 +2231,25 @@ ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB26_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB26_2: ; X86-NOBMI-NEXT: subl $8, %esp -; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: addl $4, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -2193,25 +2262,25 @@ ; X86-BMI1NOTBM-NEXT: pushl %eax ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi +; X86-BMI1NOTBM-NEXT: movl $-1, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB26_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB26_2: ; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi +; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: addl $4, %esp ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi @@ -2472,26 +2541,26 @@ ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax +; X86-NOBMI-NEXT: shrdl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB28_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %ebx, %edx ; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: .LBB28_2: -; X86-NOBMI-NEXT: movl (%edx), %esi -; X86-NOBMI-NEXT: andl %eax, %esi -; X86-NOBMI-NEXT: movl 4(%edx), %edi +; X86-NOBMI-NEXT: movl (%eax), %esi +; X86-NOBMI-NEXT: andl %edx, %esi +; X86-NOBMI-NEXT: movl 4(%eax), %edi ; X86-NOBMI-NEXT: andl %ebx, %edi ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx -; X86-NOBMI-NEXT: pushl %eax +; X86-NOBMI-NEXT: pushl %edx ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp ; X86-NOBMI-NEXT: movl %esi, %eax @@ -2506,26 +2575,26 @@ ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB28_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %ebx, %edx ; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: .LBB28_2: -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %eax, %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi +; X86-BMI1NOTBM-NEXT: movl (%eax), %esi +; X86-BMI1NOTBM-NEXT: andl %edx, %esi +; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edi ; X86-BMI1NOTBM-NEXT: andl %ebx, %edi ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %eax +; X86-BMI1NOTBM-NEXT: pushl %edx ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp ; X86-BMI1NOTBM-NEXT: movl %esi, %eax @@ -2540,25 +2609,25 @@ ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax +; X86-BMI1BMI2-NEXT: movl $-1, %edx +; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %ebx +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB28_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %ebx, %eax +; X86-BMI1BMI2-NEXT: movl %ebx, %edx ; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: .LBB28_2: -; X86-BMI1BMI2-NEXT: movl (%edx), %esi -; X86-BMI1BMI2-NEXT: andl %eax, %esi -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi +; X86-BMI1BMI2-NEXT: movl (%eax), %esi +; X86-BMI1BMI2-NEXT: andl %edx, %esi +; X86-BMI1BMI2-NEXT: movl 4(%eax), %edi ; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp ; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %eax +; X86-BMI1BMI2-NEXT: pushl %edx ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp ; X86-BMI1BMI2-NEXT: movl %esi, %eax @@ -2630,25 +2699,25 @@ ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB29_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB29_2: ; X86-NOBMI-NEXT: subl $8, %esp -; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: addl $4, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -2661,25 +2730,25 @@ ; X86-BMI1NOTBM-NEXT: pushl %eax ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi +; X86-BMI1NOTBM-NEXT: movl $-1, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB29_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB29_2: ; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi +; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: addl $4, %esp ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi @@ -2998,80 +3067,72 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_d0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl %edx, %esi -; X86-NOBMI-NEXT: shll %cl, %esi -; X86-NOBMI-NEXT: shldl %cl, %edx, %eax +; X86-NOBMI-NEXT: movl %edx, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %edx, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: jne .LBB34_2 +; X86-NOBMI-NEXT: je .LBB34_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB34_2: -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB34_4 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB34_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %ebx -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB34_4: -; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB34_6 +; X86-NOBMI-NEXT: je .LBB34_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB34_6: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_d0: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax +; X86-BMI1NOTBM-NEXT: movl %edx, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB34_2 +; X86-BMI1NOTBM-NEXT: je .LBB34_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB34_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB34_4 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB34_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB34_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB34_6 +; X86-BMI1NOTBM-NEXT: je .LBB34_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB34_6: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_d0: @@ -3083,22 +3144,22 @@ ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB34_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB34_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB34_4 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB34_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB34_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB34_6 +; X86-BMI1BMI2-NEXT: je .LBB34_6 ; X86-BMI1BMI2-NEXT: # %bb.5: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB34_6: @@ -3135,80 +3196,72 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_d1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl %edx, %esi -; X86-NOBMI-NEXT: shll %cl, %esi -; X86-NOBMI-NEXT: shldl %cl, %edx, %eax +; X86-NOBMI-NEXT: movl %edx, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %edx, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: jne .LBB35_2 +; X86-NOBMI-NEXT: je .LBB35_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB35_2: -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB35_4 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB35_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %ebx -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB35_4: -; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB35_6 +; X86-NOBMI-NEXT: je .LBB35_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB35_6: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax +; X86-BMI1NOTBM-NEXT: movl %edx, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB35_2 +; X86-BMI1NOTBM-NEXT: je .LBB35_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB35_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB35_4 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB35_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB35_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB35_6 +; X86-BMI1NOTBM-NEXT: je .LBB35_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB35_6: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext: @@ -3220,22 +3273,22 @@ ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB35_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB35_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB35_4 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB35_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB35_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB35_6 +; X86-BMI1BMI2-NEXT: je .LBB35_6 ; X86-BMI1BMI2-NEXT: # %bb.5: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB35_6: @@ -3275,82 +3328,74 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_d2_load: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %edx -; X86-NOBMI-NEXT: movl 4(%eax), %eax +; X86-NOBMI-NEXT: movl 4(%eax), %esi ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl %edx, %esi -; X86-NOBMI-NEXT: shll %cl, %esi -; X86-NOBMI-NEXT: shldl %cl, %edx, %eax +; X86-NOBMI-NEXT: movl %edx, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %edx, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: jne .LBB36_2 +; X86-NOBMI-NEXT: je .LBB36_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB36_2: -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB36_4 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB36_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %ebx -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB36_4: -; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB36_6 +; X86-NOBMI-NEXT: je .LBB36_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB36_6: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_d2_load: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movl (%eax), %edx -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax +; X86-BMI1NOTBM-NEXT: movl 4(%eax), %esi ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax +; X86-BMI1NOTBM-NEXT: movl %edx, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB36_2 +; X86-BMI1NOTBM-NEXT: je .LBB36_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB36_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB36_4 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB36_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB36_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB36_6 +; X86-BMI1NOTBM-NEXT: je .LBB36_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB36_6: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_d2_load: @@ -3363,22 +3408,22 @@ ; X86-BMI1BMI2-NEXT: movl $64, %ecx ; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB36_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB36_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB36_4 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB36_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB36_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB36_6 +; X86-BMI1BMI2-NEXT: je .LBB36_6 ; X86-BMI1BMI2-NEXT: # %bb.5: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB36_6: @@ -3416,82 +3461,74 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %edx -; X86-NOBMI-NEXT: movl 4(%eax), %eax +; X86-NOBMI-NEXT: movl 4(%eax), %esi ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl %edx, %esi -; X86-NOBMI-NEXT: shll %cl, %esi -; X86-NOBMI-NEXT: shldl %cl, %edx, %eax +; X86-NOBMI-NEXT: movl %edx, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: shldl %cl, %edx, %esi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: jne .LBB37_2 +; X86-NOBMI-NEXT: je .LBB37_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NOBMI-NEXT: .LBB37_2: -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: xorl %ebx, %ebx +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: movl $0, %edx -; X86-NOBMI-NEXT: jne .LBB37_4 +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: je .LBB37_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %ebx -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB37_4: -; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax ; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: jne .LBB37_6 +; X86-NOBMI-NEXT: je .LBB37_6 ; X86-NOBMI-NEXT: # %bb.5: -; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %edi, %eax ; X86-NOBMI-NEXT: .LBB37_6: ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi -; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %edi ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movl (%eax), %edx -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax +; X86-BMI1NOTBM-NEXT: movl 4(%eax), %esi ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax +; X86-BMI1NOTBM-NEXT: movl %edx, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB37_2 +; X86-BMI1NOTBM-NEXT: je .LBB37_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi +; X86-BMI1NOTBM-NEXT: movl %eax, %esi ; X86-BMI1NOTBM-NEXT: .LBB37_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: shrl %cl, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB37_4 +; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: je .LBB37_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB37_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB37_6 +; X86-BMI1NOTBM-NEXT: je .LBB37_6 ; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax +; X86-BMI1NOTBM-NEXT: movl %edi, %eax ; X86-BMI1NOTBM-NEXT: .LBB37_6: ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: @@ -3504,22 +3541,22 @@ ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx +; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB37_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi +; X86-BMI1BMI2-NEXT: movl %eax, %esi +; X86-BMI1BMI2-NEXT: movl $0, %eax ; X86-BMI1BMI2-NEXT: .LBB37_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB37_4 +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi +; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: je .LBB37_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB37_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB37_6 +; X86-BMI1BMI2-NEXT: je .LBB37_6 ; X86-BMI1BMI2-NEXT: # %bb.5: ; X86-BMI1BMI2-NEXT: movl %edi, %eax ; X86-BMI1BMI2-NEXT: .LBB37_6: Index: test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll +++ test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll @@ -9,11 +9,11 @@ ; SSE-LABEL: select_fcmp_one_f32: ; SSE: ## %bb.0: ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: jne LBB0_2 +; SSE-NEXT: je LBB0_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm3, %xmm2 +; SSE-NEXT: movaps %xmm2, %xmm3 ; SSE-NEXT: LBB0_2: -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movaps %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_one_f32: @@ -30,11 +30,11 @@ ; SSE-LABEL: select_fcmp_one_f64: ; SSE: ## %bb.0: ; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: jne LBB1_2 +; SSE-NEXT: je LBB1_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm3, %xmm2 +; SSE-NEXT: movaps %xmm2, %xmm3 ; SSE-NEXT: LBB1_2: -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movaps %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_one_f64: @@ -51,19 +51,21 @@ ; SSE-LABEL: select_icmp_eq_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: je LBB2_2 +; SSE-NEXT: jne LBB2_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB2_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_eq_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: je LBB2_2 +; AVX-NEXT: jne LBB2_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB2_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp eq i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -74,19 +76,21 @@ ; SSE-LABEL: select_icmp_ne_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jne LBB3_2 +; SSE-NEXT: je LBB3_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB3_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_ne_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jne LBB3_2 +; AVX-NEXT: je LBB3_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB3_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp ne i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -97,19 +101,21 @@ ; SSE-LABEL: select_icmp_ugt_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: ja LBB4_2 +; SSE-NEXT: jbe LBB4_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB4_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_ugt_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: ja LBB4_2 +; AVX-NEXT: jbe LBB4_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB4_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp ugt i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -120,19 +126,21 @@ ; SSE-LABEL: select_icmp_uge_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jae LBB5_2 +; SSE-NEXT: jb LBB5_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB5_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_uge_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jae LBB5_2 +; AVX-NEXT: jb LBB5_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB5_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp uge i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -143,19 +151,21 @@ ; SSE-LABEL: select_icmp_ult_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jb LBB6_2 +; SSE-NEXT: jae LBB6_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB6_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_ult_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jb LBB6_2 +; AVX-NEXT: jae LBB6_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB6_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -166,19 +176,21 @@ ; SSE-LABEL: select_icmp_ule_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jbe LBB7_2 +; SSE-NEXT: ja LBB7_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB7_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_ule_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jbe LBB7_2 +; AVX-NEXT: ja LBB7_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB7_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp ule i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -189,19 +201,21 @@ ; SSE-LABEL: select_icmp_sgt_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jg LBB8_2 +; SSE-NEXT: jle LBB8_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB8_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_sgt_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jg LBB8_2 +; AVX-NEXT: jle LBB8_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB8_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp sgt i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -212,19 +226,21 @@ ; SSE-LABEL: select_icmp_sge_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jge LBB9_2 +; SSE-NEXT: jl LBB9_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB9_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_sge_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jge LBB9_2 +; AVX-NEXT: jl LBB9_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB9_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp sge i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -235,19 +251,21 @@ ; SSE-LABEL: select_icmp_slt_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jl LBB10_2 +; SSE-NEXT: jge LBB10_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB10_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_slt_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jl LBB10_2 +; AVX-NEXT: jge LBB10_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB10_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp slt i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -258,19 +276,21 @@ ; SSE-LABEL: select_icmp_sle_f32: ; SSE: ## %bb.0: ; SSE-NEXT: cmpq %rsi, %rdi -; SSE-NEXT: jle LBB11_2 +; SSE-NEXT: jg LBB11_2 ; SSE-NEXT: ## %bb.1: -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: LBB11_2: +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_icmp_sle_f32: ; AVX: ## %bb.0: ; AVX-NEXT: cmpq %rsi, %rdi -; AVX-NEXT: jle LBB11_2 +; AVX-NEXT: jg LBB11_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: LBB11_2: +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, float %c, float %d @@ -281,12 +301,12 @@ ; CHECK-LABEL: select_icmp_sle_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: jle LBB12_1 -; CHECK-NEXT: ## %bb.2: +; CHECK-NEXT: jle LBB12_2 +; CHECK-NEXT: ## %bb.1: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq -; CHECK-NEXT: LBB12_1: +; CHECK-NEXT: LBB12_2: ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq Index: test/CodeGen/X86/fdiv-combine.ll =================================================================== --- test/CodeGen/X86/fdiv-combine.ll +++ test/CodeGen/X86/fdiv-combine.ll @@ -100,11 +100,11 @@ ; CHECK-LABEL: div_select_constant_fold: ; CHECK: # %bb.0: ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB6_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB6_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB6_1: +; CHECK-NEXT: .LBB6_2: ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 @@ -116,11 +116,11 @@ ; CHECK-LABEL: div_select_constant_fold_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB7_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB7_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: jmp .LBB7_3 -; CHECK-NEXT: .LBB7_1: +; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: .LBB7_3: ; CHECK-NEXT: xorps %xmm1, %xmm1 Index: test/CodeGen/X86/fp-cvt.ll =================================================================== --- test/CodeGen/X86/fp-cvt.ll +++ test/CodeGen/X86/fp-cvt.ll @@ -455,12 +455,13 @@ ; X86-NEXT: fnstsw %ax ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf -; X86-NEXT: ja .LBB10_2 +; X86-NEXT: jbe .LBB10_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: fstp %st(1) +; X86-NEXT: fstp %st(0) ; X86-NEXT: fldz +; X86-NEXT: fxch %st(1) ; X86-NEXT: .LBB10_2: -; X86-NEXT: fstp %st(0) +; X86-NEXT: fstp %st(1) ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F @@ -549,12 +550,13 @@ ; X86-NEXT: fnstsw %ax ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf -; X86-NEXT: ja .LBB11_2 +; X86-NEXT: jbe .LBB11_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: fstp %st(1) +; X86-NEXT: fstp %st(0) ; X86-NEXT: fldz +; X86-NEXT: fxch %st(1) ; X86-NEXT: .LBB11_2: -; X86-NEXT: fstp %st(0) +; X86-NEXT: fstp %st(1) ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F Index: test/CodeGen/X86/fp128-cast.ll =================================================================== --- test/CodeGen/X86/fp128-cast.ll +++ test/CodeGen/X86/fp128-cast.ll @@ -897,12 +897,13 @@ ; X32-NEXT: testb $-128, {{[0-9]+}}(%esp) ; X32-NEXT: flds {{\.LCPI.*}} ; X32-NEXT: flds {{\.LCPI.*}} -; X32-NEXT: jne .LBB17_3 +; X32-NEXT: je .LBB17_3 ; X32-NEXT: # %bb.2: # %if.then -; X32-NEXT: fstp %st(1) +; X32-NEXT: fstp %st(0) ; X32-NEXT: fldz +; X32-NEXT: fxch %st(1) ; X32-NEXT: .LBB17_3: # %if.then -; X32-NEXT: fstp %st(0) +; X32-NEXT: fstp %st(1) ; X32-NEXT: subl $16, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, (%esp) Index: test/CodeGen/X86/fp128-compare.ll =================================================================== --- test/CodeGen/X86/fp128-compare.ll +++ test/CodeGen/X86/fp128-compare.ll @@ -123,12 +123,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: callq __gttf2 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jg .LBB6_2 +; CHECK-NEXT: jle .LBB6_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: .LBB6_2: # %entry Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -190,20 +190,22 @@ ; SSE: # %bb.0: # %entry ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: jns .LBB3_2 +; SSE-NEXT: js .LBB3_2 ; SSE-NEXT: # %bb.1: # %entry -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: .LBB3_2: # %entry +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: TestI128_2: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: jns .LBB3_2 +; AVX-NEXT: js .LBB3_2 ; AVX-NEXT: # %bb.1: # %entry -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: .LBB3_2: # %entry +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq entry: %0 = bitcast fp128 %x to i128 @@ -407,20 +409,22 @@ ; SSE: # %bb.0: # %entry ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: jns .LBB8_2 +; SSE-NEXT: js .LBB8_2 ; SSE-NEXT: # %bb.1: # %entry -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: .LBB8_2: # %entry +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: TestComp: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: jns .LBB8_2 +; AVX-NEXT: js .LBB8_2 ; AVX-NEXT: # %bb.1: # %entry -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm0, %xmm1 ; AVX-NEXT: .LBB8_2: # %entry +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq entry: %0 = bitcast fp128 %x to i128 Index: test/CodeGen/X86/fp128-select.ll =================================================================== --- test/CodeGen/X86/fp128-select.ll +++ test/CodeGen/X86/fp128-select.ll @@ -12,12 +12,12 @@ ; MMX-LABEL: test_select: ; MMX: # %bb.0: ; MMX-NEXT: testl %edx, %edx -; MMX-NEXT: jne .LBB0_1 -; MMX-NEXT: # %bb.2: +; MMX-NEXT: jne .LBB0_2 +; MMX-NEXT: # %bb.1: ; MMX-NEXT: movaps {{.*}}(%rip), %xmm0 ; MMX-NEXT: movaps %xmm0, (%rsi) ; MMX-NEXT: retq -; MMX-NEXT: .LBB0_1: +; MMX-NEXT: .LBB0_2: ; MMX-NEXT: movups (%rdi), %xmm0 ; MMX-NEXT: movaps %xmm0, (%rsi) ; MMX-NEXT: retq Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -320,11 +320,11 @@ ; CHECK-LIBCALL-NEXT: pushq %rbx ; CHECK-LIBCALL-NEXT: movq %rsi, %rbx ; CHECK-LIBCALL-NEXT: testq %rdi, %rdi -; CHECK-LIBCALL-NEXT: js .LBB10_1 -; CHECK-LIBCALL-NEXT: # %bb.2: +; CHECK-LIBCALL-NEXT: js .LBB10_2 +; CHECK-LIBCALL-NEXT: # %bb.1: ; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 ; CHECK-LIBCALL-NEXT: jmp .LBB10_3 -; CHECK-LIBCALL-NEXT: .LBB10_1: +; CHECK-LIBCALL-NEXT: .LBB10_2: ; CHECK-LIBCALL-NEXT: movq %rdi, %rax ; CHECK-LIBCALL-NEXT: shrq %rax ; CHECK-LIBCALL-NEXT: andl $1, %edi @@ -340,11 +340,11 @@ ; BWON-F16C-LABEL: test_uitofp_i64: ; BWON-F16C: # %bb.0: ; BWON-F16C-NEXT: testq %rdi, %rdi -; BWON-F16C-NEXT: js .LBB10_1 -; BWON-F16C-NEXT: # %bb.2: +; BWON-F16C-NEXT: js .LBB10_2 +; BWON-F16C-NEXT: # %bb.1: ; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 ; BWON-F16C-NEXT: jmp .LBB10_3 -; BWON-F16C-NEXT: .LBB10_1: +; BWON-F16C-NEXT: .LBB10_2: ; BWON-F16C-NEXT: movq %rdi, %rax ; BWON-F16C-NEXT: shrq %rax ; BWON-F16C-NEXT: andl $1, %edi Index: test/CodeGen/X86/i386-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/i386-shrink-wrapping.ll +++ test/CodeGen/X86/i386-shrink-wrapping.ll @@ -42,10 +42,10 @@ ; CHECK: [[FOREND_LABEL]]: ; CHECK-NEXT: xorl ; CHECK-NEXT: cmpb $0, _d -; CHECK-NEXT: movl $0, %edx -; CHECK-NEXT: jne [[CALL_LABEL:LBB[0-9_]+]] +; CHECK-NEXT: movb $6, %dl +; CHECK-NEXT: je [[CALL_LABEL:LBB[0-9_]+]] ; -; CHECK: movb $6, %dl +; CHECK: movl $0, %edx ; ; CHECK: [[CALL_LABEL]] ; Index: test/CodeGen/X86/i686-win-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/i686-win-shrink-wrapping.ll +++ test/CodeGen/X86/i686-win-shrink-wrapping.ll @@ -16,7 +16,7 @@ ; This is the end of the entry block. ; The prologue should have happened before that point because past ; this point, EFLAGS is live. -; CHECK: jg +; CHECK: jle define x86_thiscallcc void @stackRealignment(%struct.S* %this) { entry: %data = alloca [1 x i32], align 4 Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -89,32 +89,32 @@ ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: movl %edx, %edi ; CHECK-NEXT: shll %cl, %edi -; CHECK-NEXT: shldl %cl, %ebx, %esi +; CHECK-NEXT: shldl %cl, %edx, %esi ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: je .LBB4_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %ebx, %ebp ; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shll %cl, %ebx -; CHECK-NEXT: shldl %cl, %edx, %ebp +; CHECK-NEXT: shll %cl, %ebp +; CHECK-NEXT: shldl %cl, %ebx, %edx ; CHECK-NEXT: testb $32, %ch ; CHECK-NEXT: je .LBB4_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: movl %ebx, %ebp -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: movl %ebp, %edx +; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: movl %ebp, 12(%eax) -; CHECK-NEXT: movl %ebx, 8(%eax) +; CHECK-NEXT: movl %edx, 12(%eax) +; CHECK-NEXT: movl %ebp, 8(%eax) ; CHECK-NEXT: movl %esi, 4(%eax) ; CHECK-NEXT: movl %edi, (%eax) ; CHECK-NEXT: popl %esi Index: test/CodeGen/X86/machine-trace-metrics-crash.ll =================================================================== --- test/CodeGen/X86/machine-trace-metrics-crash.ll +++ test/CodeGen/X86/machine-trace-metrics-crash.ll @@ -22,18 +22,19 @@ ; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq foo -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload ; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: mulss %xmm0, %xmm2 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: addss %xmm1, %xmm0 -; CHECK-NEXT: addss %xmm2, %xmm0 -; CHECK-NEXT: movss %xmm0, (%rax) +; CHECK-NEXT: mulss %xmm1, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm0, %xmm1 +; CHECK-NEXT: addss %xmm2, %xmm1 +; CHECK-NEXT: movss %xmm1, (%rax) +; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: # %bb.4: # %if.end -; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: .LBB0_5: # %if.end ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: addss %xmm0, %xmm0 Index: test/CodeGen/X86/mul-constant-result.ll =================================================================== --- test/CodeGen/X86/mul-constant-result.ll +++ test/CodeGen/X86/mul-constant-result.ll @@ -9,22 +9,18 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X86-LABEL: mult: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $1, %eax -; X86-NEXT: movl $1, %esi -; X86-NEXT: jg .LBB0_2 +; X86-NEXT: movl %edx, %eax +; X86-NEXT: jle .LBB0_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl $1, %eax ; X86-NEXT: .LBB0_2: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: testl %edx, %edx -; X86-NEXT: je .LBB0_4 +; X86-NEXT: jne .LBB0_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl $1, %eax ; X86-NEXT: .LBB0_4: ; X86-NEXT: decl %ecx ; X86-NEXT: cmpl $31, %ecx @@ -33,86 +29,50 @@ ; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4) ; X86-NEXT: .LBB0_6: ; X86-NEXT: addl %eax, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_39: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: .LBB0_40: -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_7: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_8: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $2, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_9: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_10: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: addl %eax, %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_11: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (,%eax,8), %ecx ; X86-NEXT: jmp .LBB0_12 ; X86-NEXT: .LBB0_13: ; X86-NEXT: shll $3, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_14: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_15: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: addl %eax, %eax ; X86-NEXT: leal (%eax,%eax,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_16: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_17: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $2, %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_18: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,2), %ecx ; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_19: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: subl %eax, %ecx @@ -120,111 +80,69 @@ ; X86-NEXT: .LBB0_21: ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_22: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $4, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_23: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_24: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: addl %eax, %eax ; X86-NEXT: leal (%eax,%eax,8), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_25: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_26: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $2, %eax ; X86-NEXT: leal (%eax,%eax,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_27: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_28: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_29: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,2), %ecx ; X86-NEXT: shll $3, %ecx ; X86-NEXT: jmp .LBB0_12 ; X86-NEXT: .LBB0_30: ; X86-NEXT: shll $3, %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_31: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: leal (%eax,%eax,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_32: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%ecx,%ecx,4), %ecx ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_33: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_34: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_35: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx ; X86-NEXT: addl %eax, %ecx ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_36: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $5, %ecx ; X86-NEXT: subl %eax, %ecx @@ -235,14 +153,9 @@ ; X86-NEXT: .LBB0_12: ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_38: -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $5, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-HSW-LABEL: mult: Index: test/CodeGen/X86/pr15981.ll =================================================================== --- test/CodeGen/X86/pr15981.ll +++ test/CodeGen/X86/pr15981.ll @@ -31,11 +31,12 @@ define void @fn2() { ; X86-LABEL: fn2: ; X86: # %bb.0: -; X86-NEXT: movl b, %eax +; X86-NEXT: movl b, %ecx +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: decl a -; X86-NEXT: jne .LBB1_2 +; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .LBB1_2: ; X86-NEXT: movl %eax, c ; X86-NEXT: retl Index: test/CodeGen/X86/pr32345.ll =================================================================== --- test/CodeGen/X86/pr32345.ll +++ test/CodeGen/X86/pr32345.ll @@ -77,14 +77,17 @@ ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; 6860-NEXT: shrdl %cl, %edi, %esi ; 6860-NEXT: testb $32, %bl -; 6860-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: jmp .LBB0_3 ; 6860-NEXT: .LBB0_2: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: .LBB0_3: # %bb +; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; 6860-NEXT: movb %al, %cl ; 6860-NEXT: # implicit-def: $eax ; 6860-NEXT: movb %cl, (%eax) @@ -132,11 +135,11 @@ ; 686-NEXT: xorl %edx, %edx ; 686-NEXT: shrdl %cl, %edx, %eax ; 686-NEXT: testb $32, %cl -; 686-NEXT: jne .LBB0_2 +; 686-NEXT: je .LBB0_2 ; 686-NEXT: # %bb.1: # %bb -; 686-NEXT: movl %eax, %edx +; 686-NEXT: xorl %eax, %eax ; 686-NEXT: .LBB0_2: # %bb -; 686-NEXT: movb %dl, (%eax) +; 686-NEXT: movb %al, (%eax) ; 686-NEXT: movl %ebp, %esp ; 686-NEXT: popl %ebp ; 686-NEXT: .cfi_def_cfa %esp, 4 Index: test/CodeGen/X86/pr5145.ll =================================================================== --- test/CodeGen/X86/pr5145.ll +++ test/CodeGen/X86/pr5145.ll @@ -10,11 +10,11 @@ ; CHECK-NEXT: .LBB0_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpb $4, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: jg .LBB0_3 +; CHECK-NEXT: movb $5, %cl +; CHECK-NEXT: jle .LBB0_3 ; CHECK-NEXT: # %bb.2: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb $5, %cl +; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: .LBB0_3: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) @@ -25,11 +25,11 @@ ; CHECK-NEXT: .LBB0_5: # %atomicrmw.start2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpb $7, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: jl .LBB0_7 +; CHECK-NEXT: movb $6, %cl +; CHECK-NEXT: jge .LBB0_7 ; CHECK-NEXT: # %bb.6: # %atomicrmw.start2 ; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: movb $6, %cl +; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: .LBB0_7: # %atomicrmw.start2 ; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) @@ -40,11 +40,11 @@ ; CHECK-NEXT: .LBB0_9: # %atomicrmw.start8 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpb $7, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: ja .LBB0_11 +; CHECK-NEXT: movb $7, %cl +; CHECK-NEXT: jbe .LBB0_11 ; CHECK-NEXT: # %bb.10: # %atomicrmw.start8 ; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1 -; CHECK-NEXT: movb $7, %cl +; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: .LBB0_11: # %atomicrmw.start8 ; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1 ; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) @@ -55,11 +55,11 @@ ; CHECK-NEXT: .LBB0_13: # %atomicrmw.start14 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpb $9, %al -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: jb .LBB0_15 +; CHECK-NEXT: movb $8, %cl +; CHECK-NEXT: jae .LBB0_15 ; CHECK-NEXT: # %bb.14: # %atomicrmw.start14 ; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movb $8, %cl +; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: .LBB0_15: # %atomicrmw.start14 ; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) Index: test/CodeGen/X86/pseudo_cmov_lower2.ll =================================================================== --- test/CodeGen/X86/pseudo_cmov_lower2.ll +++ test/CodeGen/X86/pseudo_cmov_lower2.ll @@ -12,11 +12,11 @@ ; CHECK-NEXT: xorps %xmm3, %xmm3 ; CHECK-NEXT: ucomiss %xmm3, %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: jae .LBB0_1 -; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: jae .LBB0_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addsd %xmm2, %xmm0 ; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: .LBB0_2: # %entry ; CHECK-NEXT: addsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: movapd %xmm1, %xmm2 @@ -47,13 +47,13 @@ ; CHECK-NEXT: xorps %xmm3, %xmm3 ; CHECK-NEXT: ucomiss %xmm3, %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: jae .LBB1_1 -; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: jae .LBB1_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addsd %xmm0, %xmm2 ; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: movapd %xmm2, %xmm1 ; CHECK-NEXT: jmp .LBB1_3 -; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: .LBB1_2: # %entry ; CHECK-NEXT: addsd %xmm1, %xmm0 ; CHECK-NEXT: .LBB1_3: # %entry ; CHECK-NEXT: subsd %xmm1, %xmm0 @@ -81,12 +81,14 @@ ; CHECK-LABEL: foo3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB2_2 +; CHECK-NEXT: movapd %xmm2, %xmm3 +; CHECK-NEXT: jns .LBB2_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movapd %xmm2, %xmm1 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: movapd %xmm0, %xmm3 ; CHECK-NEXT: .LBB2_2: # %entry -; CHECK-NEXT: divsd %xmm1, %xmm0 +; CHECK-NEXT: divsd %xmm2, %xmm3 +; CHECK-NEXT: movapd %xmm3, %xmm0 ; CHECK-NEXT: retq double %p4, double %p5) nounwind { entry: @@ -112,12 +114,14 @@ ; CHECK-LABEL: foo4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB3_2 +; CHECK-NEXT: movapd %xmm2, %xmm3 +; CHECK-NEXT: jns .LBB3_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movapd %xmm2, %xmm1 -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: movapd %xmm0, %xmm3 ; CHECK-NEXT: .LBB3_2: # %entry -; CHECK-NEXT: divsd %xmm1, %xmm0 +; CHECK-NEXT: divsd %xmm2, %xmm3 +; CHECK-NEXT: movapd %xmm3, %xmm0 ; CHECK-NEXT: retq double %p4, double %p5) nounwind { entry: Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -5,16 +5,18 @@ define i64 @rotl64(i64 %A, i8 %Amt) nounwind { ; X86-LABEL: rotl64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %esi, %eax ; X86-NEXT: shll %cl, %eax -; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB0_2 ; X86-NEXT: # %bb.1: @@ -23,21 +25,24 @@ ; X86-NEXT: .LBB0_2: ; X86-NEXT: movb $64, %ch ; X86-NEXT: subb %cl, %ch -; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %ebx, %ebp ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, %ebx -; X86-NEXT: shrdl %cl, %edi, %esi +; X86-NEXT: shrl %cl, %ebp +; X86-NEXT: shrdl %cl, %ebx, %esi ; X86-NEXT: testb $32, %ch -; X86-NEXT: je .LBB0_4 +; X86-NEXT: jne .LBB0_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: jmp .LBB0_5 ; X86-NEXT: .LBB0_4: -; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: .LBB0_5: +; X86-NEXT: orl %edi, %edx ; X86-NEXT: orl %esi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: rotl64: @@ -59,16 +64,18 @@ define i64 @rotr64(i64 %A, i8 %Amt) nounwind { ; X86-LABEL: rotr64: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %edx ; X86-NEXT: shrl %cl, %edx -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: shrdl %cl, %esi, %eax +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: @@ -77,21 +84,24 @@ ; X86-NEXT: .LBB1_2: ; X86-NEXT: movb $64, %ch ; X86-NEXT: subb %cl, %ch -; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %ebx, %ebp ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: shldl %cl, %ebx, %esi ; X86-NEXT: testb $32, %ch -; X86-NEXT: je .LBB1_4 +; X86-NEXT: jne .LBB1_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: jmp .LBB1_5 ; X86-NEXT: .LBB1_4: +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: .LBB1_5: ; X86-NEXT: orl %esi, %edx -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: rotr64: @@ -653,29 +663,28 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: shll %cl, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: movl $0, %ebx -; X86-NEXT: jne .LBB28_2 +; X86-NEXT: je .LBB28_2 ; X86-NEXT: # %bb.1: # %entry -; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %esi ; X86-NEXT: .LBB28_2: # %entry ; X86-NEXT: movl $64, %edx ; X86-NEXT: subl %ecx, %edx -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %edi, %ebx ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: shrdl %cl, %edi, %esi +; X86-NEXT: shrl %cl, %ebx +; X86-NEXT: shrdl %cl, %edi, %eax ; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB28_4 +; X86-NEXT: je .LBB28_4 ; X86-NEXT: # %bb.3: # %entry -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: .LBB28_4: # %entry -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl %esi, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -56,22 +56,26 @@ define i64 @rotate_left_64(i64 %a, i64 %b) { ; X86-LABEL: rotate_left_64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ebx ; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: .cfi_offset %esi, -16 -; X86-NEXT: .cfi_offset %edi, -12 -; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %esi, %eax ; X86-NEXT: shll %cl, %eax -; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB2_2 ; X86-NEXT: # %bb.1: @@ -79,22 +83,26 @@ ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: .LBB2_2: ; X86-NEXT: negl %ecx -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: shrl %cl, %ebx -; X86-NEXT: shrdl %cl, %edi, %esi +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: shrl %cl, %ebp +; X86-NEXT: shrdl %cl, %ebx, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB2_4 +; X86-NEXT: jne .LBB2_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: jmp .LBB2_5 ; X86-NEXT: .LBB2_4: -; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: .LBB2_5: +; X86-NEXT: orl %edi, %edx ; X86-NEXT: orl %esi, %eax ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: popl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; @@ -117,22 +125,26 @@ define i64 @rotate_right_64(i64 %a, i64 %b) { ; X86-LABEL: rotate_right_64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ebx ; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: .cfi_offset %esi, -16 -; X86-NEXT: .cfi_offset %edi, -12 -; X86-NEXT: .cfi_offset %ebx, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %esi, %edx ; X86-NEXT: shrl %cl, %edx -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: shrdl %cl, %esi, %eax +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB3_2 ; X86-NEXT: # %bb.1: @@ -140,22 +152,26 @@ ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: .LBB3_2: ; X86-NEXT: negl %ecx -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: shldl %cl, %ebx, %esi ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB3_4 +; X86-NEXT: jne .LBB3_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: jmp .LBB3_5 ; X86-NEXT: .LBB3_4: +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: .LBB3_5: ; X86-NEXT: orl %esi, %edx -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: popl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; @@ -245,11 +261,12 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl 4(%eax), %ebx +; X86-NEXT: movl 4(%eax), %eax ; X86-NEXT: movl %edx, %esi ; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl %ebx, %edi +; X86-NEXT: movl %eax, %edi ; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: xorl %ebx, %ebx ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB6_2 ; X86-NEXT: # %bb.1: @@ -257,17 +274,20 @@ ; X86-NEXT: xorl %esi, %esi ; X86-NEXT: .LBB6_2: ; X86-NEXT: negl %ecx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: shrdl %cl, %ebx, %edx +; X86-NEXT: shrdl %cl, %eax, %edx ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB6_4 +; X86-NEXT: jne .LBB6_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: jmp .LBB6_5 ; X86-NEXT: .LBB6_4: -; X86-NEXT: orl %ebp, %edi +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: .LBB6_5: +; X86-NEXT: orl %ebx, %edi ; X86-NEXT: orl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: popl %esi @@ -313,13 +333,14 @@ ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %ebx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl 4(%edx), %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl %ebx, %edi +; X86-NEXT: movl %eax, %edi ; X86-NEXT: shrdl %cl, %edx, %edi +; X86-NEXT: xorl %ebx, %ebx ; X86-NEXT: testb $32, %cl ; X86-NEXT: je .LBB7_2 ; X86-NEXT: # %bb.1: @@ -327,17 +348,20 @@ ; X86-NEXT: xorl %esi, %esi ; X86-NEXT: .LBB7_2: ; X86-NEXT: negl %ecx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: shldl %cl, %eax, %edx ; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB7_4 +; X86-NEXT: jne .LBB7_4 ; X86-NEXT: # %bb.3: -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: jmp .LBB7_5 ; X86-NEXT: .LBB7_4: +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: .LBB7_5: ; X86-NEXT: orl %edx, %esi -; X86-NEXT: orl %ebp, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: popl %esi Index: test/CodeGen/X86/sat-add.ll =================================================================== --- test/CodeGen/X86/sat-add.ll +++ test/CodeGen/X86/sat-add.ll @@ -10,14 +10,13 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) { ; ANY-LABEL: unsigned_sat_constant_i8_using_min: ; ANY: # %bb.0: -; ANY-NEXT: movl %edi, %eax -; ANY-NEXT: cmpb $-43, %al -; ANY-NEXT: jb .LBB0_2 -; ANY-NEXT: # %bb.1: +; ANY-NEXT: cmpb $-43, %dil ; ANY-NEXT: movb $-43, %al +; ANY-NEXT: jae .LBB0_2 +; ANY-NEXT: # %bb.1: +; ANY-NEXT: movl %edi, %eax ; ANY-NEXT: .LBB0_2: ; ANY-NEXT: addb $42, %al -; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %c = icmp ult i8 %x, -43 %s = select i1 %c, i8 %x, i8 -43 @@ -28,12 +27,13 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) { ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum: ; ANY: # %bb.0: -; ANY-NEXT: addb $42, %dil -; ANY-NEXT: movb $-1, %al -; ANY-NEXT: jb .LBB1_2 -; ANY-NEXT: # %bb.1: ; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: addb $42, %al +; ANY-NEXT: jae .LBB1_2 +; ANY-NEXT: # %bb.1: +; ANY-NEXT: movb $-1, %al ; ANY-NEXT: .LBB1_2: +; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %a = add i8 %x, 42 %c = icmp ugt i8 %x, %a @@ -44,12 +44,13 @@ define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) { ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: addb $42, %dil -; ANY-NEXT: movb $-1, %al -; ANY-NEXT: jb .LBB2_2 -; ANY-NEXT: # %bb.1: ; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: addb $42, %al +; ANY-NEXT: jae .LBB2_2 +; ANY-NEXT: # %bb.1: +; ANY-NEXT: movb $-1, %al ; ANY-NEXT: .LBB2_2: +; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %a = add i8 %x, 42 %c = icmp ugt i8 %x, -43 @@ -183,16 +184,14 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) { ; ANY-LABEL: unsigned_sat_variable_i8_using_min: ; ANY: # %bb.0: -; ANY-NEXT: movl %edi, %eax -; ANY-NEXT: movl %esi, %ecx -; ANY-NEXT: notb %cl -; ANY-NEXT: cmpb %cl, %al -; ANY-NEXT: jb .LBB12_2 +; ANY-NEXT: movl %esi, %eax +; ANY-NEXT: notb %al +; ANY-NEXT: cmpb %al, %dil +; ANY-NEXT: jae .LBB12_2 ; ANY-NEXT: # %bb.1: -; ANY-NEXT: movl %ecx, %eax +; ANY-NEXT: movl %edi, %eax ; ANY-NEXT: .LBB12_2: ; ANY-NEXT: addb %sil, %al -; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %noty = xor i8 %y, -1 %c = icmp ult i8 %x, %noty @@ -204,12 +203,13 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum: ; ANY: # %bb.0: -; ANY-NEXT: addb %sil, %dil -; ANY-NEXT: movb $-1, %al -; ANY-NEXT: jb .LBB13_2 -; ANY-NEXT: # %bb.1: ; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: addb %sil, %al +; ANY-NEXT: jae .LBB13_2 +; ANY-NEXT: # %bb.1: +; ANY-NEXT: movb $-1, %al ; ANY-NEXT: .LBB13_2: +; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %a = add i8 %x, %y %c = icmp ugt i8 %x, %a @@ -220,15 +220,18 @@ define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: movl %esi, %eax -; ANY-NEXT: notb %al -; ANY-NEXT: cmpb %al, %dil -; ANY-NEXT: movb $-1, %al +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: movl %esi, %ecx +; ANY-NEXT: notb %cl +; ANY-NEXT: cmpb %cl, %al ; ANY-NEXT: ja .LBB14_2 ; ANY-NEXT: # %bb.1: -; ANY-NEXT: addb %sil, %dil -; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: addb %sil, %al +; ANY-NEXT: # kill: def $al killed $al killed $eax +; ANY-NEXT: retq ; ANY-NEXT: .LBB14_2: +; ANY-NEXT: movb $-1, %al +; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %noty = xor i8 %y, -1 %a = add i8 %x, %y Index: test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- test/CodeGen/X86/scalar-fp-to-i64.ll +++ test/CodeGen/X86/scalar-fp-to-i64.ll @@ -281,12 +281,13 @@ ; X87_WIN-NEXT: fnstsw %ax ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_WIN-NEXT: sahf -; X87_WIN-NEXT: ja LBB0_2 +; X87_WIN-NEXT: jbe LBB0_2 ; X87_WIN-NEXT: # %bb.1: -; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fstp %st(0) ; X87_WIN-NEXT: fldz +; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: LBB0_2: -; X87_WIN-NEXT: fstp %st(0) +; X87_WIN-NEXT: fstp %st(1) ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F @@ -315,12 +316,13 @@ ; X87_LIN-NEXT: fnstsw %ax ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_LIN-NEXT: sahf -; X87_LIN-NEXT: ja .LBB0_2 +; X87_LIN-NEXT: jbe .LBB0_2 ; X87_LIN-NEXT: # %bb.1: -; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fstp %st(0) ; X87_LIN-NEXT: fldz +; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: .LBB0_2: -; X87_LIN-NEXT: fstp %st(0) +; X87_LIN-NEXT: fstp %st(1) ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F @@ -769,12 +771,13 @@ ; X87_WIN-NEXT: fnstsw %ax ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_WIN-NEXT: sahf -; X87_WIN-NEXT: ja LBB2_2 +; X87_WIN-NEXT: jbe LBB2_2 ; X87_WIN-NEXT: # %bb.1: -; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fstp %st(0) ; X87_WIN-NEXT: fldz +; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: LBB2_2: -; X87_WIN-NEXT: fstp %st(0) +; X87_WIN-NEXT: fstp %st(1) ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F @@ -803,12 +806,13 @@ ; X87_LIN-NEXT: fnstsw %ax ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_LIN-NEXT: sahf -; X87_LIN-NEXT: ja .LBB2_2 +; X87_LIN-NEXT: jbe .LBB2_2 ; X87_LIN-NEXT: # %bb.1: -; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fstp %st(0) ; X87_LIN-NEXT: fldz +; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: .LBB2_2: -; X87_LIN-NEXT: fstp %st(0) +; X87_LIN-NEXT: fstp %st(1) ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F @@ -1326,12 +1330,13 @@ ; X87_WIN-NEXT: fnstsw %ax ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_WIN-NEXT: sahf -; X87_WIN-NEXT: ja LBB4_2 +; X87_WIN-NEXT: jbe LBB4_2 ; X87_WIN-NEXT: # %bb.1: -; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fstp %st(0) ; X87_WIN-NEXT: fldz +; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: LBB4_2: -; X87_WIN-NEXT: fstp %st(0) +; X87_WIN-NEXT: fstp %st(1) ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F @@ -1360,12 +1365,13 @@ ; X87_LIN-NEXT: fnstsw %ax ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_LIN-NEXT: sahf -; X87_LIN-NEXT: ja .LBB4_2 +; X87_LIN-NEXT: jbe .LBB4_2 ; X87_LIN-NEXT: # %bb.1: -; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fstp %st(0) ; X87_LIN-NEXT: fldz +; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: .LBB4_2: -; X87_LIN-NEXT: fstp %st(0) +; X87_LIN-NEXT: fstp %st(1) ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F Index: test/CodeGen/X86/scalar-int-to-fp.ll =================================================================== --- test/CodeGen/X86/scalar-int-to-fp.ll +++ test/CodeGen/X86/scalar-int-to-fp.ll @@ -372,11 +372,11 @@ ; SSE2_64-LABEL: u64_to_f: ; SSE2_64: # %bb.0: ; SSE2_64-NEXT: testq %rdi, %rdi -; SSE2_64-NEXT: js .LBB6_1 -; SSE2_64-NEXT: # %bb.2: +; SSE2_64-NEXT: js .LBB6_2 +; SSE2_64-NEXT: # %bb.1: ; SSE2_64-NEXT: cvtsi2ssq %rdi, %xmm0 ; SSE2_64-NEXT: retq -; SSE2_64-NEXT: .LBB6_1: +; SSE2_64-NEXT: .LBB6_2: ; SSE2_64-NEXT: movq %rdi, %rax ; SSE2_64-NEXT: shrq %rax ; SSE2_64-NEXT: andl $1, %edi Index: test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- test/CodeGen/X86/scheduler-backtracking.ll +++ test/CodeGen/X86/scheduler-backtracking.ll @@ -18,25 +18,25 @@ ; ILP-NEXT: pushq %r13 ; ILP-NEXT: pushq %r12 ; ILP-NEXT: pushq %rbx -; ILP-NEXT: movq %rcx, %r9 +; ILP-NEXT: movq %rcx, %r11 ; ILP-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; ILP-NEXT: xorl %eax, %eax ; ILP-NEXT: addq $1, %rsi ; ILP-NEXT: adcq $0, %rdx -; ILP-NEXT: adcq $0, %r9 +; ILP-NEXT: adcq $0, %r11 ; ILP-NEXT: adcq $0, %r8 ; ILP-NEXT: leal 1(%rsi,%rsi), %edi ; ILP-NEXT: movl $1, %ebp ; ILP-NEXT: xorl %r14d, %r14d ; ILP-NEXT: movl %edi, %ecx ; ILP-NEXT: shldq %cl, %rbp, %r14 -; ILP-NEXT: movl $1, %r11d -; ILP-NEXT: shlq %cl, %r11 +; ILP-NEXT: movl $1, %r13d +; ILP-NEXT: shlq %cl, %r13 ; ILP-NEXT: movb $-128, %r10b ; ILP-NEXT: subb %dil, %r10b -; ILP-NEXT: movq %r9, %r13 +; ILP-NEXT: movq %r11, %r9 ; ILP-NEXT: movl %r10d, %ecx -; ILP-NEXT: shlq %cl, %r13 +; ILP-NEXT: shlq %cl, %r9 ; ILP-NEXT: movl $1, %r12d ; ILP-NEXT: shrdq %cl, %rax, %r12 ; ILP-NEXT: xorl %r15d, %r15d @@ -46,42 +46,42 @@ ; ILP-NEXT: shrdq %cl, %rdx, %rbx ; ILP-NEXT: shrq %cl, %rdx ; ILP-NEXT: addb $-128, %cl -; ILP-NEXT: shrdq %cl, %r8, %r9 +; ILP-NEXT: shrdq %cl, %r8, %r11 ; ILP-NEXT: testb $64, %dil -; ILP-NEXT: cmovneq %r11, %r14 +; ILP-NEXT: cmovneq %r13, %r14 ; ILP-NEXT: cmoveq %rbx, %rdx ; ILP-NEXT: cmovneq %rax, %r15 -; ILP-NEXT: cmovneq %rax, %r11 +; ILP-NEXT: cmovneq %rax, %r13 ; ILP-NEXT: testb $64, %r10b ; ILP-NEXT: cmovneq %rax, %r12 -; ILP-NEXT: cmovneq %rax, %r13 +; ILP-NEXT: cmovneq %rax, %r9 ; ILP-NEXT: movl $1, %ebx ; ILP-NEXT: shlq %cl, %rbx -; ILP-NEXT: orl %edx, %r13d +; ILP-NEXT: orl %edx, %r9d ; ILP-NEXT: xorl %edx, %edx ; ILP-NEXT: movl $1, %ebp ; ILP-NEXT: shldq %cl, %rbp, %rdx ; ILP-NEXT: shrq %cl, %r8 ; ILP-NEXT: testb $64, %cl -; ILP-NEXT: cmoveq %r9, %r8 +; ILP-NEXT: cmoveq %r11, %r8 ; ILP-NEXT: cmovneq %rbx, %rdx ; ILP-NEXT: cmovneq %rax, %rbx ; ILP-NEXT: testb %dil, %dil ; ILP-NEXT: cmovsq %rax, %r14 -; ILP-NEXT: cmovsq %rax, %r11 -; ILP-NEXT: jns .LBB0_2 +; ILP-NEXT: cmovsq %rax, %r13 +; ILP-NEXT: js .LBB0_2 ; ILP-NEXT: # %bb.1: -; ILP-NEXT: movl %r8d, %r13d +; ILP-NEXT: movl %r9d, %r8d ; ILP-NEXT: .LBB0_2: -; ILP-NEXT: je .LBB0_4 +; ILP-NEXT: jne .LBB0_4 ; ILP-NEXT: # %bb.3: -; ILP-NEXT: movl %r13d, %esi +; ILP-NEXT: movl %esi, %r8d ; ILP-NEXT: .LBB0_4: ; ILP-NEXT: cmovnsq %r12, %rbx ; ILP-NEXT: cmoveq %rax, %rbx ; ILP-NEXT: cmovnsq %r15, %rdx ; ILP-NEXT: cmoveq %rax, %rdx -; ILP-NEXT: testb $1, %sil +; ILP-NEXT: testb $1, %r8b ; ILP-NEXT: cmovneq %rax, %rdx ; ILP-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; ILP-NEXT: movq %rdx, 24(%rax) @@ -89,8 +89,8 @@ ; ILP-NEXT: movq %rbx, 16(%rax) ; ILP-NEXT: cmovneq %rax, %r14 ; ILP-NEXT: movq %r14, 8(%rax) -; ILP-NEXT: cmovneq %rax, %r11 -; ILP-NEXT: movq %r11, (%rax) +; ILP-NEXT: cmovneq %rax, %r13 +; ILP-NEXT: movq %r13, (%rax) ; ILP-NEXT: popq %rbx ; ILP-NEXT: popq %r12 ; ILP-NEXT: popq %r13 @@ -157,13 +157,13 @@ ; HYBRID-NEXT: cmoveq %r9, %r8 ; HYBRID-NEXT: cmovneq %r10, %rbp ; HYBRID-NEXT: testb %dil, %dil -; HYBRID-NEXT: jns .LBB0_2 +; HYBRID-NEXT: js .LBB0_2 ; HYBRID-NEXT: # %bb.1: -; HYBRID-NEXT: movl %r8d, %r13d +; HYBRID-NEXT: movl %r13d, %r8d ; HYBRID-NEXT: .LBB0_2: -; HYBRID-NEXT: je .LBB0_4 +; HYBRID-NEXT: jne .LBB0_4 ; HYBRID-NEXT: # %bb.3: -; HYBRID-NEXT: movl %r13d, %esi +; HYBRID-NEXT: movl %esi, %r8d ; HYBRID-NEXT: .LBB0_4: ; HYBRID-NEXT: cmovsq %r10, %r15 ; HYBRID-NEXT: cmovnsq %r12, %rbp @@ -171,7 +171,7 @@ ; HYBRID-NEXT: cmovnsq %r14, %rdx ; HYBRID-NEXT: cmoveq %r10, %rdx ; HYBRID-NEXT: cmovsq %r10, %r11 -; HYBRID-NEXT: testb $1, %sil +; HYBRID-NEXT: testb $1, %r8b ; HYBRID-NEXT: cmovneq %rax, %rdx ; HYBRID-NEXT: movq %rdx, 24(%rax) ; HYBRID-NEXT: cmovneq %rax, %rbp @@ -246,13 +246,13 @@ ; BURR-NEXT: cmoveq %r9, %r8 ; BURR-NEXT: cmovneq %r10, %rbp ; BURR-NEXT: testb %dil, %dil -; BURR-NEXT: jns .LBB0_2 +; BURR-NEXT: js .LBB0_2 ; BURR-NEXT: # %bb.1: -; BURR-NEXT: movl %r8d, %r13d +; BURR-NEXT: movl %r13d, %r8d ; BURR-NEXT: .LBB0_2: -; BURR-NEXT: je .LBB0_4 +; BURR-NEXT: jne .LBB0_4 ; BURR-NEXT: # %bb.3: -; BURR-NEXT: movl %r13d, %esi +; BURR-NEXT: movl %esi, %r8d ; BURR-NEXT: .LBB0_4: ; BURR-NEXT: cmovsq %r10, %r15 ; BURR-NEXT: cmovnsq %r12, %rbp @@ -260,7 +260,7 @@ ; BURR-NEXT: cmovnsq %r14, %rdx ; BURR-NEXT: cmoveq %r10, %rdx ; BURR-NEXT: cmovsq %r10, %r11 -; BURR-NEXT: testb $1, %sil +; BURR-NEXT: testb $1, %r8b ; BURR-NEXT: cmovneq %rax, %rdx ; BURR-NEXT: movq %rdx, 24(%rax) ; BURR-NEXT: cmovneq %rax, %rbp @@ -294,9 +294,9 @@ ; SRC-NEXT: leal 1(%rsi,%rsi), %r11d ; SRC-NEXT: movb $-128, %r10b ; SRC-NEXT: subb %r11b, %r10b -; SRC-NEXT: movq %r9, %r12 +; SRC-NEXT: movq %r9, %r13 ; SRC-NEXT: movl %r10d, %ecx -; SRC-NEXT: shlq %cl, %r12 +; SRC-NEXT: shlq %cl, %r13 ; SRC-NEXT: movq %rsi, %rbp ; SRC-NEXT: movl %r11d, %ecx ; SRC-NEXT: shrdq %cl, %rdx, %rbp @@ -305,22 +305,22 @@ ; SRC-NEXT: movl $1, %edi ; SRC-NEXT: xorl %r14d, %r14d ; SRC-NEXT: shldq %cl, %rdi, %r14 -; SRC-NEXT: xorl %r13d, %r13d -; SRC-NEXT: shldq %cl, %r13, %r13 +; SRC-NEXT: xorl %r12d, %r12d +; SRC-NEXT: shldq %cl, %r12, %r12 ; SRC-NEXT: movl $1, %ebx ; SRC-NEXT: shlq %cl, %rbx ; SRC-NEXT: testb $64, %r11b ; SRC-NEXT: cmoveq %rbp, %rdx ; SRC-NEXT: cmovneq %rbx, %r14 ; SRC-NEXT: cmovneq %r15, %rbx -; SRC-NEXT: cmovneq %r15, %r13 +; SRC-NEXT: cmovneq %r15, %r12 ; SRC-NEXT: movl $1, %ebp ; SRC-NEXT: movl %r10d, %ecx ; SRC-NEXT: shrdq %cl, %r15, %rbp ; SRC-NEXT: testb $64, %r10b -; SRC-NEXT: cmovneq %r15, %r12 +; SRC-NEXT: cmovneq %r15, %r13 ; SRC-NEXT: cmovneq %r15, %rbp -; SRC-NEXT: orl %edx, %r12d +; SRC-NEXT: orl %edx, %r13d ; SRC-NEXT: movl %r11d, %ecx ; SRC-NEXT: addb $-128, %cl ; SRC-NEXT: shrdq %cl, %r8, %r9 @@ -333,21 +333,21 @@ ; SRC-NEXT: cmovneq %rdi, %rdx ; SRC-NEXT: cmovneq %r15, %rdi ; SRC-NEXT: testb %r11b, %r11b -; SRC-NEXT: jns .LBB0_2 +; SRC-NEXT: js .LBB0_2 ; SRC-NEXT: # %bb.1: -; SRC-NEXT: movl %r8d, %r12d +; SRC-NEXT: movl %r13d, %r8d ; SRC-NEXT: .LBB0_2: -; SRC-NEXT: je .LBB0_4 +; SRC-NEXT: jne .LBB0_4 ; SRC-NEXT: # %bb.3: -; SRC-NEXT: movl %r12d, %esi +; SRC-NEXT: movl %esi, %r8d ; SRC-NEXT: .LBB0_4: -; SRC-NEXT: cmovnsq %r13, %rdx +; SRC-NEXT: cmovnsq %r12, %rdx ; SRC-NEXT: cmoveq %r15, %rdx ; SRC-NEXT: cmovnsq %rbp, %rdi ; SRC-NEXT: cmoveq %r15, %rdi ; SRC-NEXT: cmovsq %r15, %r14 ; SRC-NEXT: cmovsq %r15, %rbx -; SRC-NEXT: testb $1, %sil +; SRC-NEXT: testb $1, %r8b ; SRC-NEXT: cmovneq %rax, %rbx ; SRC-NEXT: cmovneq %rax, %r14 ; SRC-NEXT: cmovneq %rax, %rdi @@ -408,15 +408,15 @@ ; LIN-NEXT: testb $64, %r11b ; LIN-NEXT: cmovneq %r15, %r9 ; LIN-NEXT: orl %edx, %r9d -; LIN-NEXT: jns .LBB0_2 +; LIN-NEXT: js .LBB0_2 ; LIN-NEXT: # %bb.1: -; LIN-NEXT: movl %r8d, %r9d +; LIN-NEXT: movl %r9d, %r8d ; LIN-NEXT: .LBB0_2: -; LIN-NEXT: je .LBB0_4 +; LIN-NEXT: jne .LBB0_4 ; LIN-NEXT: # %bb.3: -; LIN-NEXT: movl %r9d, %esi +; LIN-NEXT: movl %esi, %r8d ; LIN-NEXT: .LBB0_4: -; LIN-NEXT: testb $1, %sil +; LIN-NEXT: testb $1, %r8b ; LIN-NEXT: cmovneq %rax, %rbx ; LIN-NEXT: movq %rbx, (%rax) ; LIN-NEXT: xorl %edx, %edx Index: test/CodeGen/X86/select-mmx.ll =================================================================== --- test/CodeGen/X86/select-mmx.ll +++ test/CodeGen/X86/select-mmx.ll @@ -32,12 +32,12 @@ ; I32-NEXT: .cfi_def_cfa_register %ebp ; I32-NEXT: andl $-8, %esp ; I32-NEXT: subl $16, %esp -; I32-NEXT: movl 8(%ebp), %eax -; I32-NEXT: orl 12(%ebp), %eax -; I32-NEXT: movl $7, %eax -; I32-NEXT: je .LBB0_2 -; I32-NEXT: # %bb.1: +; I32-NEXT: movl 8(%ebp), %ecx ; I32-NEXT: xorl %eax, %eax +; I32-NEXT: orl 12(%ebp), %ecx +; I32-NEXT: jne .LBB0_2 +; I32-NEXT: # %bb.1: +; I32-NEXT: movl $7, %eax ; I32-NEXT: .LBB0_2: ; I32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; I32-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -87,11 +87,11 @@ ; I32-NEXT: subl $8, %esp ; I32-NEXT: movl 8(%ebp), %eax ; I32-NEXT: orl 12(%ebp), %eax -; I32-NEXT: je .LBB1_1 -; I32-NEXT: # %bb.2: +; I32-NEXT: je .LBB1_2 +; I32-NEXT: # %bb.1: ; I32-NEXT: leal 24(%ebp), %eax ; I32-NEXT: jmp .LBB1_3 -; I32-NEXT: .LBB1_1: +; I32-NEXT: .LBB1_2: ; I32-NEXT: leal 16(%ebp), %eax ; I32-NEXT: .LBB1_3: ; I32-NEXT: movq (%eax), %mm0 Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -31,12 +31,12 @@ ; MCU-LABEL: test1: ; MCU: # %bb.0: ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: jne .LBB0_1 -; MCU-NEXT: # %bb.2: +; MCU-NEXT: jne .LBB0_2 +; MCU-NEXT: # %bb.1: ; MCU-NEXT: addl $8, %edx ; MCU-NEXT: movl (%edx), %eax ; MCU-NEXT: retl -; MCU-NEXT: .LBB0_1: +; MCU-NEXT: .LBB0_2: ; MCU-NEXT: addl $8, %eax ; MCU-NEXT: movl (%eax), %eax ; MCU-NEXT: retl @@ -106,14 +106,14 @@ ; MCU-LABEL: test2: ; MCU: # %bb.0: # %entry ; MCU-NEXT: calll return_false -; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: testb $1, %al -; MCU-NEXT: jne .LBB1_2 +; MCU-NEXT: movl $-480, %eax # imm = 0xFE20 +; MCU-NEXT: je .LBB1_2 ; MCU-NEXT: # %bb.1: # %entry -; MCU-NEXT: movl $-480, %ecx # imm = 0xFE20 +; MCU-NEXT: xorl %eax, %eax ; MCU-NEXT: .LBB1_2: # %entry -; MCU-NEXT: shll $3, %ecx -; MCU-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; MCU-NEXT: shll $3, %eax +; MCU-NEXT: cmpl $32768, %eax # imm = 0x8000 ; MCU-NEXT: jge .LBB1_3 ; MCU-NEXT: # %bb.4: # %bb91 ; MCU-NEXT: xorl %eax, %eax @@ -222,11 +222,11 @@ ; CHECK-LABEL: test5: ; CHECK: ## %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne LBB4_2 +; CHECK-NEXT: je LBB4_2 ; CHECK-NEXT: ## %bb.1: -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: LBB4_2: -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: movd %xmm0, (%rsi) ; CHECK-NEXT: retq @@ -273,12 +273,12 @@ ; CHECK-LABEL: test6: ; CHECK: ## %bb.0: ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je LBB5_1 -; CHECK-NEXT: ## %bb.2: +; CHECK-NEXT: je LBB5_2 +; CHECK-NEXT: ## %bb.1: ; CHECK-NEXT: movaps (%rsi), %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsi) ; CHECK-NEXT: retq -; CHECK-NEXT: LBB5_1: +; CHECK-NEXT: LBB5_2: ; CHECK-NEXT: movaps (%rdx), %xmm0 ; CHECK-NEXT: mulps %xmm0, %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsi) @@ -325,7 +325,6 @@ ; MCU: # %bb.0: ; MCU-NEXT: pushl %eax ; MCU-NEXT: flds 12(%edx) -; MCU-NEXT: fstps (%esp) # 4-byte Folded Spill ; MCU-NEXT: flds 8(%edx) ; MCU-NEXT: flds 4(%edx) ; MCU-NEXT: flds (%ecx) @@ -333,37 +332,35 @@ ; MCU-NEXT: flds 8(%ecx) ; MCU-NEXT: flds 12(%ecx) ; MCU-NEXT: fmul %st(0), %st(0) +; MCU-NEXT: fstps (%esp) # 4-byte Folded Spill +; MCU-NEXT: fmul %st(0), %st(0) ; MCU-NEXT: fxch %st(1) ; MCU-NEXT: fmul %st(0), %st(0) ; MCU-NEXT: fxch %st(2) ; MCU-NEXT: fmul %st(0), %st(0) -; MCU-NEXT: fxch %st(3) -; MCU-NEXT: fmul %st(0), %st(0) ; MCU-NEXT: testl %eax, %eax ; MCU-NEXT: flds (%edx) -; MCU-NEXT: je .LBB5_2 +; MCU-NEXT: jne .LBB5_2 ; MCU-NEXT: # %bb.1: +; MCU-NEXT: fstp %st(4) +; MCU-NEXT: fstp %st(4) +; MCU-NEXT: fstp %st(4) ; MCU-NEXT: fstp %st(1) -; MCU-NEXT: fstp %st(3) -; MCU-NEXT: fstp %st(1) -; MCU-NEXT: fstp %st(0) ; MCU-NEXT: flds (%esp) # 4-byte Folded Reload ; MCU-NEXT: fldz ; MCU-NEXT: fldz ; MCU-NEXT: fldz -; MCU-NEXT: fxch %st(1) +; MCU-NEXT: fxch %st(3) ; MCU-NEXT: fxch %st(6) -; MCU-NEXT: fxch %st(1) ; MCU-NEXT: fxch %st(5) -; MCU-NEXT: fxch %st(4) ; MCU-NEXT: fxch %st(1) -; MCU-NEXT: fxch %st(3) ; MCU-NEXT: fxch %st(2) +; MCU-NEXT: fxch %st(1) ; MCU-NEXT: .LBB5_2: -; MCU-NEXT: fstp %st(0) -; MCU-NEXT: fstp %st(5) -; MCU-NEXT: fstp %st(3) -; MCU-NEXT: fxch %st(2) +; MCU-NEXT: fstp %st(1) +; MCU-NEXT: fstp %st(1) +; MCU-NEXT: fstp %st(1) +; MCU-NEXT: fxch %st(3) ; MCU-NEXT: fstps 12(%edx) ; MCU-NEXT: fxch %st(1) ; MCU-NEXT: fstps 8(%edx) @@ -428,8 +425,8 @@ ; GENERIC-LABEL: test8: ; GENERIC: ## %bb.0: ; GENERIC-NEXT: testb $1, %dil -; GENERIC-NEXT: jne LBB7_1 -; GENERIC-NEXT: ## %bb.2: +; GENERIC-NEXT: jne LBB7_2 +; GENERIC-NEXT: ## %bb.1: ; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -440,7 +437,7 @@ ; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; GENERIC-NEXT: jmp LBB7_3 -; GENERIC-NEXT: LBB7_1: +; GENERIC-NEXT: LBB7_2: ; GENERIC-NEXT: movd %r9d, %xmm0 ; GENERIC-NEXT: movd %r8d, %xmm1 ; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -462,8 +459,8 @@ ; ATOM-LABEL: test8: ; ATOM: ## %bb.0: ; ATOM-NEXT: testb $1, %dil -; ATOM-NEXT: jne LBB7_1 -; ATOM-NEXT: ## %bb.2: +; ATOM-NEXT: jne LBB7_2 +; ATOM-NEXT: ## %bb.1: ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero @@ -473,7 +470,7 @@ ; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: jmp LBB7_3 -; ATOM-NEXT: LBB7_1: +; ATOM-NEXT: LBB7_2: ; ATOM-NEXT: movd %r9d, %xmm1 ; ATOM-NEXT: movd %r8d, %xmm2 ; ATOM-NEXT: movd %ecx, %xmm3 @@ -549,51 +546,51 @@ ; MCU-NEXT: pushl %edi ; MCU-NEXT: pushl %esi ; MCU-NEXT: testb $1, %al -; MCU-NEXT: jne .LBB7_1 -; MCU-NEXT: # %bb.2: +; MCU-NEXT: jne .LBB7_2 +; MCU-NEXT: # %bb.1: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax ; MCU-NEXT: movl (%eax), %eax -; MCU-NEXT: je .LBB7_5 -; MCU-NEXT: .LBB7_4: +; MCU-NEXT: je .LBB7_4 +; MCU-NEXT: .LBB7_5: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx ; MCU-NEXT: movl (%ecx), %ecx -; MCU-NEXT: je .LBB7_8 -; MCU-NEXT: .LBB7_7: +; MCU-NEXT: je .LBB7_7 +; MCU-NEXT: .LBB7_8: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi ; MCU-NEXT: movl (%esi), %esi -; MCU-NEXT: je .LBB7_11 -; MCU-NEXT: .LBB7_10: +; MCU-NEXT: je .LBB7_10 +; MCU-NEXT: .LBB7_11: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi ; MCU-NEXT: movl (%edi), %edi -; MCU-NEXT: je .LBB7_14 -; MCU-NEXT: .LBB7_13: +; MCU-NEXT: je .LBB7_13 +; MCU-NEXT: .LBB7_14: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx ; MCU-NEXT: movl (%ebx), %ebx -; MCU-NEXT: je .LBB7_17 -; MCU-NEXT: .LBB7_16: +; MCU-NEXT: je .LBB7_16 +; MCU-NEXT: .LBB7_17: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp ; MCU-NEXT: jmp .LBB7_18 -; MCU-NEXT: .LBB7_1: +; MCU-NEXT: .LBB7_2: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax ; MCU-NEXT: movl (%eax), %eax -; MCU-NEXT: jne .LBB7_4 -; MCU-NEXT: .LBB7_5: +; MCU-NEXT: jne .LBB7_5 +; MCU-NEXT: .LBB7_4: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx ; MCU-NEXT: movl (%ecx), %ecx -; MCU-NEXT: jne .LBB7_7 -; MCU-NEXT: .LBB7_8: +; MCU-NEXT: jne .LBB7_8 +; MCU-NEXT: .LBB7_7: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi ; MCU-NEXT: movl (%esi), %esi -; MCU-NEXT: jne .LBB7_10 -; MCU-NEXT: .LBB7_11: +; MCU-NEXT: jne .LBB7_11 +; MCU-NEXT: .LBB7_10: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi ; MCU-NEXT: movl (%edi), %edi -; MCU-NEXT: jne .LBB7_13 -; MCU-NEXT: .LBB7_14: +; MCU-NEXT: jne .LBB7_14 +; MCU-NEXT: .LBB7_13: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx ; MCU-NEXT: movl (%ebx), %ebx -; MCU-NEXT: jne .LBB7_16 -; MCU-NEXT: .LBB7_17: +; MCU-NEXT: jne .LBB7_17 +; MCU-NEXT: .LBB7_16: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp ; MCU-NEXT: .LBB7_18: ; MCU-NEXT: movl (%ebp), %ebp @@ -656,14 +653,13 @@ ; MCU-LABEL: test9: ; MCU: # %bb.0: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: jne .LBB8_1 -; MCU-NEXT: # %bb.2: ; MCU-NEXT: movl $-1, %eax ; MCU-NEXT: movl $-1, %edx -; MCU-NEXT: retl -; MCU-NEXT: .LBB8_1: +; MCU-NEXT: je .LBB8_2 +; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax ; MCU-NEXT: movl {{[0-9]+}}(%esp), %edx +; MCU-NEXT: .LBB8_2: ; MCU-NEXT: retl %cmp = icmp ne i64 %x, 0 %cond = select i1 %cmp, i64 %y, i64 -1 @@ -704,13 +700,14 @@ ; MCU-LABEL: test9a: ; MCU: # %bb.0: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: movl $-1, %edx ; MCU-NEXT: je .LBB9_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax ; MCU-NEXT: movl {{[0-9]+}}(%esp), %edx +; MCU-NEXT: retl ; MCU-NEXT: .LBB9_2: +; MCU-NEXT: movl $-1, %eax +; MCU-NEXT: movl $-1, %edx ; MCU-NEXT: retl %cmp = icmp eq i64 %x, 0 %cond = select i1 %cmp, i64 -1, i64 %y @@ -787,13 +784,14 @@ ; MCU-LABEL: test10: ; MCU: # %bb.0: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: movl $-1, %edx ; MCU-NEXT: je .LBB11_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: xorl %edx, %edx ; MCU-NEXT: movl $1, %eax +; MCU-NEXT: retl ; MCU-NEXT: .LBB11_2: +; MCU-NEXT: movl $-1, %eax +; MCU-NEXT: movl $-1, %edx ; MCU-NEXT: retl %cmp = icmp eq i64 %x, 0 %cond = select i1 %cmp, i64 -1, i64 1 @@ -825,14 +823,13 @@ ; MCU-LABEL: test11: ; MCU: # %bb.0: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: je .LBB12_1 -; MCU-NEXT: # %bb.2: ; MCU-NEXT: movl $-1, %eax ; MCU-NEXT: movl $-1, %edx -; MCU-NEXT: retl -; MCU-NEXT: .LBB12_1: +; MCU-NEXT: jne .LBB12_2 +; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax ; MCU-NEXT: movl {{[0-9]+}}(%esp), %edx +; MCU-NEXT: .LBB12_2: ; MCU-NEXT: retl %cmp = icmp eq i64 %x, 0 %cond = select i1 %cmp, i64 %y, i64 -1 @@ -864,13 +861,14 @@ ; MCU-LABEL: test11a: ; MCU: # %bb.0: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: movl $-1, %edx ; MCU-NEXT: jne .LBB13_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax ; MCU-NEXT: movl {{[0-9]+}}(%esp), %edx +; MCU-NEXT: retl ; MCU-NEXT: .LBB13_2: +; MCU-NEXT: movl $-1, %eax +; MCU-NEXT: movl $-1, %edx ; MCU-NEXT: retl %cmp = icmp ne i64 %x, 0 %cond = select i1 %cmp, i64 -1, i64 %y @@ -1090,11 +1088,11 @@ ; MCU-LABEL: test18: ; MCU: # %bb.0: ; MCU-NEXT: cmpl $15, %eax -; MCU-NEXT: jl .LBB19_2 +; MCU-NEXT: jge .LBB19_2 ; MCU-NEXT: # %bb.1: -; MCU-NEXT: movl %ecx, %edx +; MCU-NEXT: movl %edx, %ecx ; MCU-NEXT: .LBB19_2: -; MCU-NEXT: movl %edx, %eax +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %cmp = icmp slt i32 %x, 15 %sel = select i1 %cmp, i8 %a, i8 %b @@ -1133,33 +1131,18 @@ ; reproducer for pr29002 define void @clamp_i8(i32 %src, i8* %dst) { -; GENERIC-LABEL: clamp_i8: -; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpl $127, %edi -; GENERIC-NEXT: movl $127, %eax -; GENERIC-NEXT: cmovlel %edi, %eax -; GENERIC-NEXT: cmpl $-128, %eax -; GENERIC-NEXT: movb $-128, %cl -; GENERIC-NEXT: jl LBB21_2 -; GENERIC-NEXT: ## %bb.1: -; GENERIC-NEXT: movl %eax, %ecx -; GENERIC-NEXT: LBB21_2: -; GENERIC-NEXT: movb %cl, (%rsi) -; GENERIC-NEXT: retq -; -; ATOM-LABEL: clamp_i8: -; ATOM: ## %bb.0: -; ATOM-NEXT: cmpl $127, %edi -; ATOM-NEXT: movl $127, %eax -; ATOM-NEXT: movb $-128, %cl -; ATOM-NEXT: cmovlel %edi, %eax -; ATOM-NEXT: cmpl $-128, %eax -; ATOM-NEXT: jl LBB21_2 -; ATOM-NEXT: ## %bb.1: -; ATOM-NEXT: movl %eax, %ecx -; ATOM-NEXT: LBB21_2: -; ATOM-NEXT: movb %cl, (%rsi) -; ATOM-NEXT: retq +; CHECK-LABEL: clamp_i8: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl $127, %edi +; CHECK-NEXT: movl $127, %eax +; CHECK-NEXT: cmovlel %edi, %eax +; CHECK-NEXT: cmpl $-128, %eax +; CHECK-NEXT: jge LBB21_2 +; CHECK-NEXT: ## %bb.1: +; CHECK-NEXT: movb $-128, %al +; CHECK-NEXT: LBB21_2: +; CHECK-NEXT: movb %al, (%rsi) +; CHECK-NEXT: retq ; ; ATHLON-LABEL: clamp_i8: ; ATHLON: ## %bb.0: @@ -1169,30 +1152,31 @@ ; ATHLON-NEXT: movl $127, %ecx ; ATHLON-NEXT: cmovlel %edx, %ecx ; ATHLON-NEXT: cmpl $-128, %ecx -; ATHLON-NEXT: movb $-128, %dl -; ATHLON-NEXT: jl LBB21_2 +; ATHLON-NEXT: jge LBB21_2 ; ATHLON-NEXT: ## %bb.1: -; ATHLON-NEXT: movl %ecx, %edx +; ATHLON-NEXT: movb $-128, %cl ; ATHLON-NEXT: LBB21_2: -; ATHLON-NEXT: movb %dl, (%eax) +; ATHLON-NEXT: movb %cl, (%eax) ; ATHLON-NEXT: retl ; ; MCU-LABEL: clamp_i8: ; MCU: # %bb.0: ; MCU-NEXT: cmpl $127, %eax -; MCU-NEXT: movl $127, %ecx -; MCU-NEXT: jg .LBB21_2 -; MCU-NEXT: # %bb.1: -; MCU-NEXT: movl %eax, %ecx -; MCU-NEXT: .LBB21_2: -; MCU-NEXT: cmpl $-128, %ecx -; MCU-NEXT: movb $-128, %al -; MCU-NEXT: jl .LBB21_4 -; MCU-NEXT: # %bb.3: -; MCU-NEXT: movl %ecx, %eax +; MCU-NEXT: jg .LBB21_1 +; MCU-NEXT: # %bb.2: +; MCU-NEXT: cmpl $-128, %eax +; MCU-NEXT: jl .LBB21_3 ; MCU-NEXT: .LBB21_4: ; MCU-NEXT: movb %al, (%edx) ; MCU-NEXT: retl +; MCU-NEXT: .LBB21_1: +; MCU-NEXT: movl $127, %eax +; MCU-NEXT: cmpl $-128, %eax +; MCU-NEXT: jge .LBB21_4 +; MCU-NEXT: .LBB21_3: +; MCU-NEXT: movb $-128, %al +; MCU-NEXT: movb %al, (%edx) +; MCU-NEXT: retl %cmp = icmp sgt i32 %src, 127 %sel1 = select i1 %cmp, i32 127, i32 %src %cmp1 = icmp slt i32 %sel1, -128 @@ -1242,19 +1226,21 @@ ; MCU-LABEL: clamp: ; MCU: # %bb.0: ; MCU-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; MCU-NEXT: movl $32767, %ecx # imm = 0x7FFF -; MCU-NEXT: jg .LBB22_2 -; MCU-NEXT: # %bb.1: -; MCU-NEXT: movl %eax, %ecx -; MCU-NEXT: .LBB22_2: -; MCU-NEXT: cmpl $-32768, %ecx # imm = 0x8000 -; MCU-NEXT: movl $32768, %eax # imm = 0x8000 -; MCU-NEXT: jl .LBB22_4 -; MCU-NEXT: # %bb.3: -; MCU-NEXT: movl %ecx, %eax +; MCU-NEXT: jg .LBB22_1 +; MCU-NEXT: # %bb.2: +; MCU-NEXT: cmpl $-32768, %eax # imm = 0x8000 +; MCU-NEXT: jl .LBB22_3 ; MCU-NEXT: .LBB22_4: ; MCU-NEXT: movw %ax, (%edx) ; MCU-NEXT: retl +; MCU-NEXT: .LBB22_1: +; MCU-NEXT: movl $32767, %eax # imm = 0x7FFF +; MCU-NEXT: cmpl $-32768, %eax # imm = 0x8000 +; MCU-NEXT: jge .LBB22_4 +; MCU-NEXT: .LBB22_3: +; MCU-NEXT: movl $32768, %eax # imm = 0x8000 +; MCU-NEXT: movw %ax, (%edx) +; MCU-NEXT: retl %cmp = icmp sgt i32 %src, 32767 %sel1 = select i1 %cmp, i32 32767, i32 %src %cmp1 = icmp slt i32 %sel1, -32768 @@ -1426,7 +1412,7 @@ ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %dl ; MCU-NEXT: je .LBB25_2 -; MCU-NEXT: # %bb.1: +; MCU-NEXT: # %bb.1: # %entry ; MCU-NEXT: xorl $43, %eax ; MCU-NEXT: .LBB25_2: # %entry ; MCU-NEXT: # kill: def $ax killed $ax killed $eax @@ -1496,7 +1482,7 @@ ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl ; MCU-NEXT: je .LBB27_2 -; MCU-NEXT: # %bb.1: +; MCU-NEXT: # %bb.1: # %entry ; MCU-NEXT: xorl %edx, %eax ; MCU-NEXT: .LBB27_2: # %entry ; MCU-NEXT: retl @@ -1565,7 +1551,7 @@ ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl ; MCU-NEXT: je .LBB29_2 -; MCU-NEXT: # %bb.1: +; MCU-NEXT: # %bb.1: # %entry ; MCU-NEXT: orl %edx, %eax ; MCU-NEXT: .LBB29_2: # %entry ; MCU-NEXT: retl @@ -1634,7 +1620,7 @@ ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl ; MCU-NEXT: je .LBB31_2 -; MCU-NEXT: # %bb.1: +; MCU-NEXT: # %bb.1: # %entry ; MCU-NEXT: orl %edx, %eax ; MCU-NEXT: .LBB31_2: # %entry ; MCU-NEXT: retl Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -380,10 +380,10 @@ ; CHECK-LABEL: sel_67_neg125: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpl $42, %edi -; CHECK-NEXT: movb $67, %al -; CHECK-NEXT: jg .LBB31_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movb $-125, %al +; CHECK-NEXT: jle .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movb $67, %al ; CHECK-NEXT: .LBB31_2: ; CHECK-NEXT: retq %cmp = icmp sgt i32 %x, 42 @@ -450,11 +450,11 @@ ; CHECK-LABEL: sel_constants_add_constant_vec: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB36_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB36_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [12,13,14,15] ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB36_1: +; CHECK-NEXT: .LBB36_2: ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967293,14,4,4] ; CHECK-NEXT: retq %sel = select i1 %cond, <4 x i32> , <4 x i32> @@ -466,11 +466,11 @@ ; CHECK-LABEL: sel_constants_fmul_constant_vec: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB37_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jne .LBB37_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.1883E+2,3.4539999999999999E+1] ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB37_1: +; CHECK-NEXT: .LBB37_2: ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-2.0399999999999999E+1,3.768E+1] ; CHECK-NEXT: retq %sel = select i1 %cond, <2 x double> , <2 x double> Index: test/CodeGen/X86/sse-scalar-fp-arith.ll =================================================================== --- test/CodeGen/X86/sse-scalar-fp-arith.ll +++ test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1306,25 +1306,23 @@ ; X86-SSE2-LABEL: add_ss_mask: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: jne .LBB70_1 -; X86-SSE2-NEXT: # %bb.2: -; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] -; X86-SSE2-NEXT: retl -; X86-SSE2-NEXT: .LBB70_1: +; X86-SSE2-NEXT: je .LBB70_2 +; X86-SSE2-NEXT: # %bb.1: ; X86-SSE2-NEXT: addss %xmm0, %xmm1 -; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X86-SSE2-NEXT: movaps %xmm1, %xmm2 +; X86-SSE2-NEXT: .LBB70_2: +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; X86-SSE2-NEXT: retl ; ; X86-SSE41-LABEL: add_ss_mask: ; X86-SSE41: # %bb.0: ; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-SSE41-NEXT: jne .LBB70_1 -; X86-SSE41-NEXT: # %bb.2: -; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] -; X86-SSE41-NEXT: retl -; X86-SSE41-NEXT: .LBB70_1: +; X86-SSE41-NEXT: je .LBB70_2 +; X86-SSE41-NEXT: # %bb.1: ; X86-SSE41-NEXT: addss %xmm0, %xmm1 -; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X86-SSE41-NEXT: movaps %xmm1, %xmm2 +; X86-SSE41-NEXT: .LBB70_2: +; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; X86-SSE41-NEXT: retl ; ; X86-AVX1-LABEL: add_ss_mask: @@ -1348,25 +1346,23 @@ ; X64-SSE2-LABEL: add_ss_mask: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: testb $1, %dil -; X64-SSE2-NEXT: jne .LBB70_1 -; X64-SSE2-NEXT: # %bb.2: -; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] -; X64-SSE2-NEXT: retq -; X64-SSE2-NEXT: .LBB70_1: +; X64-SSE2-NEXT: je .LBB70_2 +; X64-SSE2-NEXT: # %bb.1: ; X64-SSE2-NEXT: addss %xmm0, %xmm1 -; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X64-SSE2-NEXT: movaps %xmm1, %xmm2 +; X64-SSE2-NEXT: .LBB70_2: +; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; X64-SSE2-NEXT: retq ; ; X64-SSE41-LABEL: add_ss_mask: ; X64-SSE41: # %bb.0: ; X64-SSE41-NEXT: testb $1, %dil -; X64-SSE41-NEXT: jne .LBB70_1 -; X64-SSE41-NEXT: # %bb.2: -; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] -; X64-SSE41-NEXT: retq -; X64-SSE41-NEXT: .LBB70_1: +; X64-SSE41-NEXT: je .LBB70_2 +; X64-SSE41-NEXT: # %bb.1: ; X64-SSE41-NEXT: addss %xmm0, %xmm1 -; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; X64-SSE41-NEXT: movaps %xmm1, %xmm2 +; X64-SSE41-NEXT: .LBB70_2: +; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; X64-SSE41-NEXT: retq ; ; X64-AVX1-LABEL: add_ss_mask: @@ -1400,25 +1396,23 @@ ; X86-SSE2-LABEL: add_sd_mask: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: jne .LBB71_1 -; X86-SSE2-NEXT: # %bb.2: -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] -; X86-SSE2-NEXT: retl -; X86-SSE2-NEXT: .LBB71_1: +; X86-SSE2-NEXT: je .LBB71_2 +; X86-SSE2-NEXT: # %bb.1: ; X86-SSE2-NEXT: addsd %xmm0, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X86-SSE2-NEXT: movapd %xmm1, %xmm2 +; X86-SSE2-NEXT: .LBB71_2: +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; X86-SSE2-NEXT: retl ; ; X86-SSE41-LABEL: add_sd_mask: ; X86-SSE41: # %bb.0: ; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) -; X86-SSE41-NEXT: jne .LBB71_1 -; X86-SSE41-NEXT: # %bb.2: -; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] -; X86-SSE41-NEXT: retl -; X86-SSE41-NEXT: .LBB71_1: +; X86-SSE41-NEXT: je .LBB71_2 +; X86-SSE41-NEXT: # %bb.1: ; X86-SSE41-NEXT: addsd %xmm0, %xmm1 -; X86-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X86-SSE41-NEXT: movapd %xmm1, %xmm2 +; X86-SSE41-NEXT: .LBB71_2: +; X86-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; X86-SSE41-NEXT: retl ; ; X86-AVX1-LABEL: add_sd_mask: @@ -1442,25 +1436,23 @@ ; X64-SSE2-LABEL: add_sd_mask: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: testb $1, %dil -; X64-SSE2-NEXT: jne .LBB71_1 -; X64-SSE2-NEXT: # %bb.2: -; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] -; X64-SSE2-NEXT: retq -; X64-SSE2-NEXT: .LBB71_1: +; X64-SSE2-NEXT: je .LBB71_2 +; X64-SSE2-NEXT: # %bb.1: ; X64-SSE2-NEXT: addsd %xmm0, %xmm1 -; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X64-SSE2-NEXT: movapd %xmm1, %xmm2 +; X64-SSE2-NEXT: .LBB71_2: +; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; X64-SSE2-NEXT: retq ; ; X64-SSE41-LABEL: add_sd_mask: ; X64-SSE41: # %bb.0: ; X64-SSE41-NEXT: testb $1, %dil -; X64-SSE41-NEXT: jne .LBB71_1 -; X64-SSE41-NEXT: # %bb.2: -; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] -; X64-SSE41-NEXT: retq -; X64-SSE41-NEXT: .LBB71_1: +; X64-SSE41-NEXT: je .LBB71_2 +; X64-SSE41-NEXT: # %bb.1: ; X64-SSE41-NEXT: addsd %xmm0, %xmm1 -; X64-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X64-SSE41-NEXT: movapd %xmm1, %xmm2 +; X64-SSE41-NEXT: .LBB71_2: +; X64-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; X64-SSE41-NEXT: retq ; ; X64-AVX1-LABEL: add_sd_mask: Index: test/CodeGen/X86/sse1.ll =================================================================== --- test/CodeGen/X86/sse1.ll +++ test/CodeGen/X86/sse1.ll @@ -46,37 +46,41 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X86-NEXT: xorps %xmm0, %xmm0 +; X86-NEXT: xorps %xmm1, %xmm1 ; X86-NEXT: je .LBB1_1 ; X86-NEXT: # %bb.2: # %entry -; X86-NEXT: xorps %xmm1, %xmm1 ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: jne .LBB1_5 -; X86-NEXT: .LBB1_4: -; X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-NEXT: xorps %xmm2, %xmm2 +; X86-NEXT: je .LBB1_3 +; X86-NEXT: .LBB1_4: # %entry ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: jne .LBB1_8 -; X86-NEXT: .LBB1_7: -; X86-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-NEXT: xorps %xmm3, %xmm3 +; X86-NEXT: je .LBB1_5 +; X86-NEXT: .LBB1_6: # %entry ; X86-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: je .LBB1_10 -; X86-NEXT: jmp .LBB1_11 -; X86-NEXT: .LBB1_1: +; X86-NEXT: je .LBB1_7 +; X86-NEXT: .LBB1_8: # %entry +; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X86-NEXT: retl +; X86-NEXT: .LBB1_1: # %entry ; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: je .LBB1_4 -; X86-NEXT: .LBB1_5: # %entry ; X86-NEXT: xorps %xmm2, %xmm2 +; X86-NEXT: jne .LBB1_4 +; X86-NEXT: .LBB1_3: # %entry +; X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: je .LBB1_7 -; X86-NEXT: .LBB1_8: # %entry ; X86-NEXT: xorps %xmm3, %xmm3 +; X86-NEXT: jne .LBB1_6 +; X86-NEXT: .LBB1_5: # %entry +; X86-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X86-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: jne .LBB1_11 -; X86-NEXT: .LBB1_10: +; X86-NEXT: jne .LBB1_8 +; X86-NEXT: .LBB1_7: # %entry ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: .LBB1_11: # %entry ; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X86-NEXT: retl @@ -85,37 +89,41 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: testl %edx, %edx ; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: je .LBB1_1 ; X64-NEXT: # %bb.2: # %entry -; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: testl %ecx, %ecx -; X64-NEXT: jne .LBB1_5 -; X64-NEXT: .LBB1_4: -; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: je .LBB1_3 +; X64-NEXT: .LBB1_4: # %entry ; X64-NEXT: testl %r8d, %r8d -; X64-NEXT: jne .LBB1_8 -; X64-NEXT: .LBB1_7: -; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT: xorps %xmm3, %xmm3 +; X64-NEXT: je .LBB1_5 +; X64-NEXT: .LBB1_6: # %entry ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB1_10 -; X64-NEXT: jmp .LBB1_11 -; X64-NEXT: .LBB1_1: +; X64-NEXT: je .LBB1_7 +; X64-NEXT: .LBB1_8: # %entry +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-NEXT: retq +; X64-NEXT: .LBB1_1: # %entry ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: testl %ecx, %ecx -; X64-NEXT: je .LBB1_4 -; X64-NEXT: .LBB1_5: # %entry ; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: jne .LBB1_4 +; X64-NEXT: .LBB1_3: # %entry +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64-NEXT: testl %r8d, %r8d -; X64-NEXT: je .LBB1_7 -; X64-NEXT: .LBB1_8: # %entry ; X64-NEXT: xorps %xmm3, %xmm3 +; X64-NEXT: jne .LBB1_6 +; X64-NEXT: .LBB1_5: # %entry +; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: testl %esi, %esi -; X64-NEXT: jne .LBB1_11 -; X64-NEXT: .LBB1_10: +; X64-NEXT: jne .LBB1_8 +; X64-NEXT: .LBB1_7: # %entry ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: .LBB1_11: # %entry ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq Index: test/CodeGen/X86/uadd_sat.ll =================================================================== --- test/CodeGen/X86/uadd_sat.ll +++ test/CodeGen/X86/uadd_sat.ll @@ -51,28 +51,28 @@ define i4 @func3(i4 %x, i4 %y) { ; CHECK-LABEL: func3: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: shlb $4, %sil -; CHECK-NEXT: shlb $4, %dil -; CHECK-NEXT: addb %sil, %dil -; CHECK-NEXT: movb $-1, %al -; CHECK-NEXT: jb .LBB2_2 +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: addb %sil, %al +; CHECK-NEXT: jae .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movb $-1, %al ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: shrb $4, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq ; ; CHECK32-LABEL: func3: ; CHECK32: # %bb.0: -; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: shlb $4, %al +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: shlb $4, %cl -; CHECK32-NEXT: addb %al, %cl -; CHECK32-NEXT: movb $-1, %al -; CHECK32-NEXT: jb .LBB2_2 +; CHECK32-NEXT: shlb $4, %al +; CHECK32-NEXT: addb %cl, %al +; CHECK32-NEXT: jae .LBB2_2 ; CHECK32-NEXT: # %bb.1: -; CHECK32-NEXT: movl %ecx, %eax +; CHECK32-NEXT: movb $-1, %al ; CHECK32-NEXT: .LBB2_2: ; CHECK32-NEXT: shrb $4, %al ; CHECK32-NEXT: retl Index: test/CodeGen/X86/uint64-to-float.ll =================================================================== --- test/CodeGen/X86/uint64-to-float.ll +++ test/CodeGen/X86/uint64-to-float.ll @@ -31,11 +31,11 @@ ; X64-LABEL: test: ; X64: # %bb.0: # %entry ; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: js .LBB0_1 -; X64-NEXT: # %bb.2: # %entry +; X64-NEXT: js .LBB0_2 +; X64-NEXT: # %bb.1: ; X64-NEXT: cvtsi2ssq %rdi, %xmm0 ; X64-NEXT: retq -; X64-NEXT: .LBB0_1: +; X64-NEXT: .LBB0_2: # %entry ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq %rax ; X64-NEXT: andl $1, %edi Index: test/CodeGen/X86/urem-seteq-optsize.ll =================================================================== --- test/CodeGen/X86/urem-seteq-optsize.ll +++ test/CodeGen/X86/urem-seteq-optsize.ll @@ -14,12 +14,12 @@ ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %ecx ; X86-NEXT: testl %edx, %edx -; X86-NEXT: je .LBB0_1 -; X86-NEXT: # %bb.2: +; X86-NEXT: je .LBB0_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: pushl $-10 ; X86-NEXT: popl %eax ; X86-NEXT: retl -; X86-NEXT: .LBB0_1: +; X86-NEXT: .LBB0_2: ; X86-NEXT: pushl $42 ; X86-NEXT: popl %eax ; X86-NEXT: retl @@ -54,10 +54,10 @@ ; X86-NEXT: shrl $2, %edx ; X86-NEXT: leal (%edx,%edx,4), %eax ; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl $42, %eax -; X86-NEXT: je .LBB1_2 -; X86-NEXT: # %bb.1: ; X86-NEXT: movl $-10, %eax +; X86-NEXT: jne .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl $42, %eax ; X86-NEXT: .LBB1_2: ; X86-NEXT: retl ; Index: test/CodeGen/X86/vec_floor.ll =================================================================== --- test/CodeGen/X86/vec_floor.ll +++ test/CodeGen/X86/vec_floor.ll @@ -1391,25 +1391,28 @@ ; SSE41-LABEL: floor_maskz_ss: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: je LBB53_2 ; SSE41-NEXT: ## %bb.1: -; SSE41-NEXT: xorps %xmm2, %xmm2 -; SSE41-NEXT: roundss $9, %xmm0, %xmm2 +; SSE41-NEXT: roundss $9, %xmm0, %xmm0 +; SSE41-NEXT: jmp LBB53_3 ; SSE41-NEXT: LBB53_2: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] +; SSE41-NEXT: xorps %xmm0, %xmm0 +; SSE41-NEXT: LBB53_3: +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: floor_maskz_ss: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX-NEXT: je LBB53_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq ; AVX-NEXT: LBB53_2: -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: floor_maskz_ss: @@ -1469,25 +1472,28 @@ ; SSE41-LABEL: floor_maskz_sd: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: xorpd %xmm2, %xmm2 ; SSE41-NEXT: je LBB55_2 ; SSE41-NEXT: ## %bb.1: -; SSE41-NEXT: xorps %xmm2, %xmm2 -; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 +; SSE41-NEXT: roundsd $9, %xmm0, %xmm0 +; SSE41-NEXT: jmp LBB55_3 ; SSE41-NEXT: LBB55_2: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE41-NEXT: xorpd %xmm0, %xmm0 +; SSE41-NEXT: LBB55_3: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: floor_maskz_sd: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: je LBB55_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq ; AVX-NEXT: LBB55_2: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: floor_maskz_sd: @@ -1546,28 +1552,25 @@ ; SSE41-LABEL: floor_maskz_ss_trunc: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: jne LBB57_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB57_3 -; SSE41-NEXT: LBB57_1: -; SSE41-NEXT: roundss $9, %xmm0, %xmm0 -; SSE41-NEXT: LBB57_3: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: je LBB57_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundss $9, %xmm0, %xmm2 +; SSE41-NEXT: LBB57_2: +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: floor_maskz_ss_trunc: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: jne LBB57_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX-NEXT: retq -; AVX-NEXT: LBB57_1: -; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB57_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB57_2: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: floor_maskz_ss_trunc: @@ -1625,28 +1628,25 @@ ; SSE41-LABEL: floor_maskz_sd_trunc: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: jne LBB59_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorpd %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB59_3 -; SSE41-NEXT: LBB59_1: -; SSE41-NEXT: roundsd $9, %xmm0, %xmm0 -; SSE41-NEXT: LBB59_3: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE41-NEXT: xorpd %xmm2, %xmm2 +; SSE41-NEXT: je LBB59_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 +; SSE41-NEXT: LBB59_2: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: floor_maskz_sd_trunc: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: jne LBB59_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX-NEXT: retq -; AVX-NEXT: LBB59_1: -; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB59_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB59_2: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] ; AVX-NEXT: retq ; ; AVX512-LABEL: floor_maskz_sd_trunc: @@ -1723,14 +1723,13 @@ ; SSE41-NEXT: cmpeqps %xmm1, %xmm2 ; SSE41-NEXT: pextrb $0, %xmm2, %eax ; SSE41-NEXT: testb $1, %al -; SSE41-NEXT: jne LBB61_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB61_3 -; SSE41-NEXT: LBB61_1: -; SSE41-NEXT: roundss $9, %xmm0, %xmm0 -; SSE41-NEXT: LBB61_3: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: je LBB61_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundss $9, %xmm0, %xmm2 +; SSE41-NEXT: LBB61_2: +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -1739,14 +1738,12 @@ ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm2 ; AVX-NEXT: vpextrb $0, %xmm2, %eax ; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne LBB61_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX-NEXT: retq -; AVX-NEXT: LBB61_1: -; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB61_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB61_2: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] ; AVX-NEXT: retq ; ; AVX512F-LABEL: floor_maskz_ss_mask8: @@ -1833,14 +1830,13 @@ ; SSE41-NEXT: cmpeqpd %xmm1, %xmm2 ; SSE41-NEXT: pextrb $0, %xmm2, %eax ; SSE41-NEXT: testb $1, %al -; SSE41-NEXT: jne LBB63_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorpd %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB63_3 -; SSE41-NEXT: LBB63_1: -; SSE41-NEXT: roundsd $9, %xmm0, %xmm0 -; SSE41-NEXT: LBB63_3: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE41-NEXT: xorpd %xmm2, %xmm2 +; SSE41-NEXT: je LBB63_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 +; SSE41-NEXT: LBB63_2: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -1849,14 +1845,12 @@ ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm2 ; AVX-NEXT: vpextrb $0, %xmm2, %eax ; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne LBB63_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; AVX-NEXT: retq -; AVX-NEXT: LBB63_1: -; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB63_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB63_2: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] ; AVX-NEXT: retq ; ; AVX512F-LABEL: floor_maskz_sd_mask8: @@ -2455,25 +2449,28 @@ ; SSE41-LABEL: ceil_maskz_ss: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: je LBB79_2 ; SSE41-NEXT: ## %bb.1: -; SSE41-NEXT: xorps %xmm2, %xmm2 -; SSE41-NEXT: roundss $10, %xmm0, %xmm2 +; SSE41-NEXT: roundss $10, %xmm0, %xmm0 +; SSE41-NEXT: jmp LBB79_3 ; SSE41-NEXT: LBB79_2: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] +; SSE41-NEXT: xorps %xmm0, %xmm0 +; SSE41-NEXT: LBB79_3: +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ceil_maskz_ss: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX-NEXT: je LBB79_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq ; AVX-NEXT: LBB79_2: -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: ceil_maskz_ss: @@ -2533,25 +2530,28 @@ ; SSE41-LABEL: ceil_maskz_sd: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: xorpd %xmm2, %xmm2 ; SSE41-NEXT: je LBB81_2 ; SSE41-NEXT: ## %bb.1: -; SSE41-NEXT: xorps %xmm2, %xmm2 -; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 +; SSE41-NEXT: roundsd $10, %xmm0, %xmm0 +; SSE41-NEXT: jmp LBB81_3 ; SSE41-NEXT: LBB81_2: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE41-NEXT: xorpd %xmm0, %xmm0 +; SSE41-NEXT: LBB81_3: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ceil_maskz_sd: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: je LBB81_2 ; AVX-NEXT: ## %bb.1: -; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq ; AVX-NEXT: LBB81_2: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: ceil_maskz_sd: @@ -2610,28 +2610,25 @@ ; SSE41-LABEL: ceil_maskz_ss_trunc: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: jne LBB83_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB83_3 -; SSE41-NEXT: LBB83_1: -; SSE41-NEXT: roundss $10, %xmm0, %xmm0 -; SSE41-NEXT: LBB83_3: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: je LBB83_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundss $10, %xmm0, %xmm2 +; SSE41-NEXT: LBB83_2: +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ceil_maskz_ss_trunc: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: jne LBB83_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX-NEXT: retq -; AVX-NEXT: LBB83_1: -; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB83_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB83_2: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: ceil_maskz_ss_trunc: @@ -2689,28 +2686,25 @@ ; SSE41-LABEL: ceil_maskz_sd_trunc: ; SSE41: ## %bb.0: ; SSE41-NEXT: testb $1, %dil -; SSE41-NEXT: jne LBB85_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorpd %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB85_3 -; SSE41-NEXT: LBB85_1: -; SSE41-NEXT: roundsd $10, %xmm0, %xmm0 -; SSE41-NEXT: LBB85_3: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE41-NEXT: xorpd %xmm2, %xmm2 +; SSE41-NEXT: je LBB85_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 +; SSE41-NEXT: LBB85_2: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ceil_maskz_sd_trunc: ; AVX: ## %bb.0: ; AVX-NEXT: testb $1, %dil -; AVX-NEXT: jne LBB85_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX-NEXT: retq -; AVX-NEXT: LBB85_1: -; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB85_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB85_2: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] ; AVX-NEXT: retq ; ; AVX512-LABEL: ceil_maskz_sd_trunc: @@ -2787,14 +2781,13 @@ ; SSE41-NEXT: cmpeqps %xmm1, %xmm2 ; SSE41-NEXT: pextrb $0, %xmm2, %eax ; SSE41-NEXT: testb $1, %al -; SSE41-NEXT: jne LBB87_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB87_3 -; SSE41-NEXT: LBB87_1: -; SSE41-NEXT: roundss $10, %xmm0, %xmm0 -; SSE41-NEXT: LBB87_3: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: je LBB87_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundss $10, %xmm0, %xmm2 +; SSE41-NEXT: LBB87_2: +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -2803,14 +2796,12 @@ ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm2 ; AVX-NEXT: vpextrb $0, %xmm2, %eax ; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne LBB87_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX-NEXT: retq -; AVX-NEXT: LBB87_1: -; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB87_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB87_2: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] ; AVX-NEXT: retq ; ; AVX512F-LABEL: ceil_maskz_ss_mask8: @@ -2897,14 +2888,13 @@ ; SSE41-NEXT: cmpeqpd %xmm1, %xmm2 ; SSE41-NEXT: pextrb $0, %xmm2, %eax ; SSE41-NEXT: testb $1, %al -; SSE41-NEXT: jne LBB89_1 -; SSE41-NEXT: ## %bb.2: -; SSE41-NEXT: xorpd %xmm0, %xmm0 -; SSE41-NEXT: jmp LBB89_3 -; SSE41-NEXT: LBB89_1: -; SSE41-NEXT: roundsd $10, %xmm0, %xmm0 -; SSE41-NEXT: LBB89_3: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE41-NEXT: xorpd %xmm2, %xmm2 +; SSE41-NEXT: je LBB89_2 +; SSE41-NEXT: ## %bb.1: +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 +; SSE41-NEXT: LBB89_2: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -2913,14 +2903,12 @@ ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm2 ; AVX-NEXT: vpextrb $0, %xmm2, %eax ; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne LBB89_1 -; AVX-NEXT: ## %bb.2: -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; AVX-NEXT: retq -; AVX-NEXT: LBB89_1: -; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: je LBB89_2 +; AVX-NEXT: ## %bb.1: +; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 +; AVX-NEXT: LBB89_2: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] ; AVX-NEXT: retq ; ; AVX512F-LABEL: ceil_maskz_sd_mask8: Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1830,12 +1830,12 @@ ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB39_1 -; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: js .LBB39_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: jmp .LBB39_3 -; SSE2-NEXT: .LBB39_1: +; SSE2-NEXT: .LBB39_2: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -1847,13 +1847,13 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB39_4 -; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: js .LBB39_5 +; SSE2-NEXT: # %bb.4: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq -; SSE2-NEXT: .LBB39_4: +; SSE2-NEXT: .LBB39_5: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -1868,11 +1868,11 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pextrq $1, %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB39_1 -; SSE41-NEXT: # %bb.2: +; SSE41-NEXT: js .LBB39_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE41-NEXT: jmp .LBB39_3 -; SSE41-NEXT: .LBB39_1: +; SSE41-NEXT: .LBB39_2: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -1882,13 +1882,13 @@ ; SSE41-NEXT: .LBB39_3: ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB39_4 -; SSE41-NEXT: # %bb.5: +; SSE41-NEXT: js .LBB39_5 +; SSE41-NEXT: # %bb.4: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; SSE41-NEXT: retq -; SSE41-NEXT: .LBB39_4: +; SSE41-NEXT: .LBB39_5: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -1903,11 +1903,11 @@ ; VEX: # %bb.0: ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB39_1 -; VEX-NEXT: # %bb.2: +; VEX-NEXT: js .LBB39_2 +; VEX-NEXT: # %bb.1: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: jmp .LBB39_3 -; VEX-NEXT: .LBB39_1: +; VEX-NEXT: .LBB39_2: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax @@ -1917,11 +1917,11 @@ ; VEX-NEXT: .LBB39_3: ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB39_4 -; VEX-NEXT: # %bb.5: +; VEX-NEXT: js .LBB39_5 +; VEX-NEXT: # %bb.4: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 ; VEX-NEXT: jmp .LBB39_6 -; VEX-NEXT: .LBB39_4: +; VEX-NEXT: .LBB39_5: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax @@ -1931,11 +1931,13 @@ ; VEX-NEXT: .LBB39_6: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: js .LBB39_8 ; VEX-NEXT: # %bb.7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: retq ; VEX-NEXT: .LBB39_8: +; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; VEX-NEXT: retq ; @@ -1984,12 +1986,12 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB40_1 -; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: js .LBB40_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: jmp .LBB40_3 -; SSE2-NEXT: .LBB40_1: +; SSE2-NEXT: .LBB40_2: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2000,12 +2002,12 @@ ; SSE2-NEXT: .LBB40_3: ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB40_4 -; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: js .LBB40_5 +; SSE2-NEXT: # %bb.4: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: jmp .LBB40_6 -; SSE2-NEXT: .LBB40_4: +; SSE2-NEXT: .LBB40_5: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2023,12 +2025,12 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB40_1 -; SSE41-NEXT: # %bb.2: +; SSE41-NEXT: js .LBB40_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE41-NEXT: jmp .LBB40_3 -; SSE41-NEXT: .LBB40_1: +; SSE41-NEXT: .LBB40_2: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2039,13 +2041,13 @@ ; SSE41-NEXT: .LBB40_3: ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB40_4 -; SSE41-NEXT: # %bb.5: +; SSE41-NEXT: js .LBB40_5 +; SSE41-NEXT: # %bb.4: ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; SSE41-NEXT: retq -; SSE41-NEXT: .LBB40_4: +; SSE41-NEXT: .LBB40_5: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2060,11 +2062,11 @@ ; VEX: # %bb.0: ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB40_1 -; VEX-NEXT: # %bb.2: +; VEX-NEXT: js .LBB40_2 +; VEX-NEXT: # %bb.1: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: jmp .LBB40_3 -; VEX-NEXT: .LBB40_1: +; VEX-NEXT: .LBB40_2: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax @@ -2074,12 +2076,12 @@ ; VEX-NEXT: .LBB40_3: ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB40_4 -; VEX-NEXT: # %bb.5: +; VEX-NEXT: js .LBB40_5 +; VEX-NEXT: # %bb.4: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq -; VEX-NEXT: .LBB40_4: +; VEX-NEXT: .LBB40_5: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax @@ -2131,12 +2133,12 @@ ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB41_1 -; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: js .LBB41_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: jmp .LBB41_3 -; SSE2-NEXT: .LBB41_1: +; SSE2-NEXT: .LBB41_2: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2148,12 +2150,12 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB41_4 -; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: js .LBB41_5 +; SSE2-NEXT: # %bb.4: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: jmp .LBB41_6 -; SSE2-NEXT: .LBB41_4: +; SSE2-NEXT: .LBB41_5: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2164,12 +2166,14 @@ ; SSE2-NEXT: .LBB41_6: ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: js .LBB41_8 ; SSE2-NEXT: # %bb.7: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE2-NEXT: retq ; SSE2-NEXT: .LBB41_8: +; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; SSE2-NEXT: retq ; @@ -2177,11 +2181,11 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pextrq $1, %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB41_1 -; SSE41-NEXT: # %bb.2: +; SSE41-NEXT: js .LBB41_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE41-NEXT: jmp .LBB41_3 -; SSE41-NEXT: .LBB41_1: +; SSE41-NEXT: .LBB41_2: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2191,12 +2195,12 @@ ; SSE41-NEXT: .LBB41_3: ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB41_4 -; SSE41-NEXT: # %bb.5: +; SSE41-NEXT: js .LBB41_5 +; SSE41-NEXT: # %bb.4: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE41-NEXT: jmp .LBB41_6 -; SSE41-NEXT: .LBB41_4: +; SSE41-NEXT: .LBB41_5: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2207,12 +2211,14 @@ ; SSE41-NEXT: .LBB41_6: ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: js .LBB41_8 ; SSE41-NEXT: # %bb.7: ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE41-NEXT: retq ; SSE41-NEXT: .LBB41_8: +; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; SSE41-NEXT: retq ; @@ -2220,11 +2226,11 @@ ; VEX: # %bb.0: ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB41_1 -; VEX-NEXT: # %bb.2: +; VEX-NEXT: js .LBB41_2 +; VEX-NEXT: # %bb.1: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: jmp .LBB41_3 -; VEX-NEXT: .LBB41_1: +; VEX-NEXT: .LBB41_2: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax @@ -2234,11 +2240,11 @@ ; VEX-NEXT: .LBB41_3: ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB41_4 -; VEX-NEXT: # %bb.5: +; VEX-NEXT: js .LBB41_5 +; VEX-NEXT: # %bb.4: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 ; VEX-NEXT: jmp .LBB41_6 -; VEX-NEXT: .LBB41_4: +; VEX-NEXT: .LBB41_5: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax @@ -2248,11 +2254,13 @@ ; VEX-NEXT: .LBB41_6: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: js .LBB41_8 ; VEX-NEXT: # %bb.7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: retq ; VEX-NEXT: .LBB41_8: +; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; VEX-NEXT: retq ; @@ -2514,11 +2522,11 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB47_1 -; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: js .LBB47_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE2-NEXT: jmp .LBB47_3 -; SSE2-NEXT: .LBB47_1: +; SSE2-NEXT: .LBB47_2: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2529,11 +2537,11 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB47_4 -; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: js .LBB47_5 +; SSE2-NEXT: # %bb.4: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE2-NEXT: jmp .LBB47_6 -; SSE2-NEXT: .LBB47_4: +; SSE2-NEXT: .LBB47_5: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2543,12 +2551,12 @@ ; SSE2-NEXT: .LBB47_6: ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB47_7 -; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: js .LBB47_8 +; SSE2-NEXT: # %bb.7: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: jmp .LBB47_9 -; SSE2-NEXT: .LBB47_7: +; SSE2-NEXT: .LBB47_8: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2561,12 +2569,12 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB47_10 -; SSE2-NEXT: # %bb.11: +; SSE2-NEXT: js .LBB47_11 +; SSE2-NEXT: # %bb.10: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: jmp .LBB47_12 -; SSE2-NEXT: .LBB47_10: +; SSE2-NEXT: .LBB47_11: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -2584,11 +2592,11 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pextrq $1, %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB47_1 -; SSE41-NEXT: # %bb.2: +; SSE41-NEXT: js .LBB47_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE41-NEXT: jmp .LBB47_3 -; SSE41-NEXT: .LBB47_1: +; SSE41-NEXT: .LBB47_2: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2598,12 +2606,12 @@ ; SSE41-NEXT: .LBB47_3: ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB47_4 -; SSE41-NEXT: # %bb.5: +; SSE41-NEXT: js .LBB47_5 +; SSE41-NEXT: # %bb.4: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE41-NEXT: jmp .LBB47_6 -; SSE41-NEXT: .LBB47_4: +; SSE41-NEXT: .LBB47_5: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2615,12 +2623,12 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB47_7 -; SSE41-NEXT: # %bb.8: +; SSE41-NEXT: js .LBB47_8 +; SSE41-NEXT: # %bb.7: ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE41-NEXT: jmp .LBB47_9 -; SSE41-NEXT: .LBB47_7: +; SSE41-NEXT: .LBB47_8: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2632,13 +2640,13 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB47_10 -; SSE41-NEXT: # %bb.11: +; SSE41-NEXT: js .LBB47_11 +; SSE41-NEXT: # %bb.10: ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; SSE41-NEXT: retq -; SSE41-NEXT: .LBB47_10: +; SSE41-NEXT: .LBB47_11: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -2653,11 +2661,11 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB47_1 -; AVX1-NEXT: # %bb.2: +; AVX1-NEXT: js .LBB47_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX1-NEXT: jmp .LBB47_3 -; AVX1-NEXT: .LBB47_1: +; AVX1-NEXT: .LBB47_2: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -2667,11 +2675,11 @@ ; AVX1-NEXT: .LBB47_3: ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB47_4 -; AVX1-NEXT: # %bb.5: +; AVX1-NEXT: js .LBB47_5 +; AVX1-NEXT: # %bb.4: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX1-NEXT: jmp .LBB47_6 -; AVX1-NEXT: .LBB47_4: +; AVX1-NEXT: .LBB47_5: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -2683,11 +2691,11 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB47_7 -; AVX1-NEXT: # %bb.8: +; AVX1-NEXT: js .LBB47_8 +; AVX1-NEXT: # %bb.7: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX1-NEXT: jmp .LBB47_9 -; AVX1-NEXT: .LBB47_7: +; AVX1-NEXT: .LBB47_8: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -2698,13 +2706,13 @@ ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB47_10 -; AVX1-NEXT: # %bb.11: +; AVX1-NEXT: js .LBB47_11 +; AVX1-NEXT: # %bb.10: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq -; AVX1-NEXT: .LBB47_10: +; AVX1-NEXT: .LBB47_11: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -2719,11 +2727,11 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB47_1 -; AVX2-NEXT: # %bb.2: +; AVX2-NEXT: js .LBB47_2 +; AVX2-NEXT: # %bb.1: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX2-NEXT: jmp .LBB47_3 -; AVX2-NEXT: .LBB47_1: +; AVX2-NEXT: .LBB47_2: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -2733,11 +2741,11 @@ ; AVX2-NEXT: .LBB47_3: ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB47_4 -; AVX2-NEXT: # %bb.5: +; AVX2-NEXT: js .LBB47_5 +; AVX2-NEXT: # %bb.4: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX2-NEXT: jmp .LBB47_6 -; AVX2-NEXT: .LBB47_4: +; AVX2-NEXT: .LBB47_5: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -2749,11 +2757,11 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB47_7 -; AVX2-NEXT: # %bb.8: +; AVX2-NEXT: js .LBB47_8 +; AVX2-NEXT: # %bb.7: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX2-NEXT: jmp .LBB47_9 -; AVX2-NEXT: .LBB47_7: +; AVX2-NEXT: .LBB47_8: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -2764,13 +2772,13 @@ ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB47_10 -; AVX2-NEXT: # %bb.11: +; AVX2-NEXT: js .LBB47_11 +; AVX2-NEXT: # %bb.10: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq -; AVX2-NEXT: .LBB47_10: +; AVX2-NEXT: .LBB47_11: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -4445,11 +4453,11 @@ ; SSE2-NEXT: movdqa 16(%rdi), %xmm0 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB76_1 -; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: js .LBB76_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: jmp .LBB76_3 -; SSE2-NEXT: .LBB76_1: +; SSE2-NEXT: .LBB76_2: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4460,11 +4468,11 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB76_4 -; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: js .LBB76_5 +; SSE2-NEXT: # %bb.4: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE2-NEXT: jmp .LBB76_6 -; SSE2-NEXT: .LBB76_4: +; SSE2-NEXT: .LBB76_5: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4474,12 +4482,12 @@ ; SSE2-NEXT: .LBB76_6: ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB76_7 -; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: js .LBB76_8 +; SSE2-NEXT: # %bb.7: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: jmp .LBB76_9 -; SSE2-NEXT: .LBB76_7: +; SSE2-NEXT: .LBB76_8: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4492,12 +4500,12 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB76_10 -; SSE2-NEXT: # %bb.11: +; SSE2-NEXT: js .LBB76_11 +; SSE2-NEXT: # %bb.10: ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE2-NEXT: jmp .LBB76_12 -; SSE2-NEXT: .LBB76_10: +; SSE2-NEXT: .LBB76_11: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4516,11 +4524,11 @@ ; SSE41-NEXT: movdqa 16(%rdi), %xmm1 ; SSE41-NEXT: pextrq $1, %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB76_1 -; SSE41-NEXT: # %bb.2: +; SSE41-NEXT: js .LBB76_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE41-NEXT: jmp .LBB76_3 -; SSE41-NEXT: .LBB76_1: +; SSE41-NEXT: .LBB76_2: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -4530,12 +4538,12 @@ ; SSE41-NEXT: .LBB76_3: ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB76_4 -; SSE41-NEXT: # %bb.5: +; SSE41-NEXT: js .LBB76_5 +; SSE41-NEXT: # %bb.4: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE41-NEXT: jmp .LBB76_6 -; SSE41-NEXT: .LBB76_4: +; SSE41-NEXT: .LBB76_5: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -4547,12 +4555,12 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB76_7 -; SSE41-NEXT: # %bb.8: +; SSE41-NEXT: js .LBB76_8 +; SSE41-NEXT: # %bb.7: ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE41-NEXT: jmp .LBB76_9 -; SSE41-NEXT: .LBB76_7: +; SSE41-NEXT: .LBB76_8: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -4564,13 +4572,13 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB76_10 -; SSE41-NEXT: # %bb.11: +; SSE41-NEXT: js .LBB76_11 +; SSE41-NEXT: # %bb.10: ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; SSE41-NEXT: retq -; SSE41-NEXT: .LBB76_10: +; SSE41-NEXT: .LBB76_11: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -4586,11 +4594,11 @@ ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB76_1 -; AVX1-NEXT: # %bb.2: +; AVX1-NEXT: js .LBB76_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX1-NEXT: jmp .LBB76_3 -; AVX1-NEXT: .LBB76_1: +; AVX1-NEXT: .LBB76_2: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -4600,11 +4608,11 @@ ; AVX1-NEXT: .LBB76_3: ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB76_4 -; AVX1-NEXT: # %bb.5: +; AVX1-NEXT: js .LBB76_5 +; AVX1-NEXT: # %bb.4: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX1-NEXT: jmp .LBB76_6 -; AVX1-NEXT: .LBB76_4: +; AVX1-NEXT: .LBB76_5: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -4616,11 +4624,11 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB76_7 -; AVX1-NEXT: # %bb.8: +; AVX1-NEXT: js .LBB76_8 +; AVX1-NEXT: # %bb.7: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX1-NEXT: jmp .LBB76_9 -; AVX1-NEXT: .LBB76_7: +; AVX1-NEXT: .LBB76_8: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -4631,13 +4639,13 @@ ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB76_10 -; AVX1-NEXT: # %bb.11: +; AVX1-NEXT: js .LBB76_11 +; AVX1-NEXT: # %bb.10: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq -; AVX1-NEXT: .LBB76_10: +; AVX1-NEXT: .LBB76_11: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -4653,11 +4661,11 @@ ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB76_1 -; AVX2-NEXT: # %bb.2: +; AVX2-NEXT: js .LBB76_2 +; AVX2-NEXT: # %bb.1: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX2-NEXT: jmp .LBB76_3 -; AVX2-NEXT: .LBB76_1: +; AVX2-NEXT: .LBB76_2: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -4667,11 +4675,11 @@ ; AVX2-NEXT: .LBB76_3: ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB76_4 -; AVX2-NEXT: # %bb.5: +; AVX2-NEXT: js .LBB76_5 +; AVX2-NEXT: # %bb.4: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX2-NEXT: jmp .LBB76_6 -; AVX2-NEXT: .LBB76_4: +; AVX2-NEXT: .LBB76_5: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -4683,11 +4691,11 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB76_7 -; AVX2-NEXT: # %bb.8: +; AVX2-NEXT: js .LBB76_8 +; AVX2-NEXT: # %bb.7: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX2-NEXT: jmp .LBB76_9 -; AVX2-NEXT: .LBB76_7: +; AVX2-NEXT: .LBB76_8: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -4698,13 +4706,13 @@ ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB76_10 -; AVX2-NEXT: # %bb.11: +; AVX2-NEXT: js .LBB76_11 +; AVX2-NEXT: # %bb.10: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq -; AVX2-NEXT: .LBB76_10: +; AVX2-NEXT: .LBB76_11: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -4905,11 +4913,11 @@ ; SSE2-NEXT: movdqa 48(%rdi), %xmm1 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_1 -; SSE2-NEXT: # %bb.2: +; SSE2-NEXT: js .LBB80_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE2-NEXT: jmp .LBB80_3 -; SSE2-NEXT: .LBB80_1: +; SSE2-NEXT: .LBB80_2: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4920,11 +4928,11 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_4 -; SSE2-NEXT: # %bb.5: +; SSE2-NEXT: js .LBB80_5 +; SSE2-NEXT: # %bb.4: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm4 ; SSE2-NEXT: jmp .LBB80_6 -; SSE2-NEXT: .LBB80_4: +; SSE2-NEXT: .LBB80_5: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4934,12 +4942,12 @@ ; SSE2-NEXT: .LBB80_6: ; SSE2-NEXT: movq %xmm5, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_7 -; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: js .LBB80_8 +; SSE2-NEXT: # %bb.7: ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: jmp .LBB80_9 -; SSE2-NEXT: .LBB80_7: +; SSE2-NEXT: .LBB80_8: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4951,11 +4959,11 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] ; SSE2-NEXT: movq %xmm5, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_10 -; SSE2-NEXT: # %bb.11: +; SSE2-NEXT: js .LBB80_11 +; SSE2-NEXT: # %bb.10: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm6 ; SSE2-NEXT: jmp .LBB80_12 -; SSE2-NEXT: .LBB80_10: +; SSE2-NEXT: .LBB80_11: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4965,12 +4973,12 @@ ; SSE2-NEXT: .LBB80_12: ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_13 -; SSE2-NEXT: # %bb.14: +; SSE2-NEXT: js .LBB80_14 +; SSE2-NEXT: # %bb.13: ; SSE2-NEXT: xorps %xmm5, %xmm5 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm5 ; SSE2-NEXT: jmp .LBB80_15 -; SSE2-NEXT: .LBB80_13: +; SSE2-NEXT: .LBB80_14: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4982,11 +4990,11 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_16 -; SSE2-NEXT: # %bb.17: +; SSE2-NEXT: js .LBB80_17 +; SSE2-NEXT: # %bb.16: ; SSE2-NEXT: cvtsi2ssq %rax, %xmm7 ; SSE2-NEXT: jmp .LBB80_18 -; SSE2-NEXT: .LBB80_16: +; SSE2-NEXT: .LBB80_17: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -4998,12 +5006,12 @@ ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_19 -; SSE2-NEXT: # %bb.20: +; SSE2-NEXT: js .LBB80_20 +; SSE2-NEXT: # %bb.19: ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE2-NEXT: jmp .LBB80_21 -; SSE2-NEXT: .LBB80_19: +; SSE2-NEXT: .LBB80_20: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -5017,12 +5025,12 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: js .LBB80_22 -; SSE2-NEXT: # %bb.23: +; SSE2-NEXT: js .LBB80_23 +; SSE2-NEXT: # %bb.22: ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE2-NEXT: jmp .LBB80_24 -; SSE2-NEXT: .LBB80_22: +; SSE2-NEXT: .LBB80_23: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax @@ -5043,11 +5051,11 @@ ; SSE41-NEXT: movdqa 48(%rdi), %xmm2 ; SSE41-NEXT: pextrq $1, %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_1 -; SSE41-NEXT: # %bb.2: +; SSE41-NEXT: js .LBB80_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE41-NEXT: jmp .LBB80_3 -; SSE41-NEXT: .LBB80_1: +; SSE41-NEXT: .LBB80_2: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5057,12 +5065,12 @@ ; SSE41-NEXT: .LBB80_3: ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_4 -; SSE41-NEXT: # %bb.5: +; SSE41-NEXT: js .LBB80_5 +; SSE41-NEXT: # %bb.4: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE41-NEXT: jmp .LBB80_6 -; SSE41-NEXT: .LBB80_4: +; SSE41-NEXT: .LBB80_5: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5073,11 +5081,11 @@ ; SSE41-NEXT: .LBB80_6: ; SSE41-NEXT: movq %xmm4, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_7 -; SSE41-NEXT: # %bb.8: +; SSE41-NEXT: js .LBB80_8 +; SSE41-NEXT: # %bb.7: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm5 ; SSE41-NEXT: jmp .LBB80_9 -; SSE41-NEXT: .LBB80_7: +; SSE41-NEXT: .LBB80_8: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5087,12 +5095,12 @@ ; SSE41-NEXT: .LBB80_9: ; SSE41-NEXT: pextrq $1, %xmm4, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_10 -; SSE41-NEXT: # %bb.11: +; SSE41-NEXT: js .LBB80_11 +; SSE41-NEXT: # %bb.10: ; SSE41-NEXT: xorps %xmm4, %xmm4 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm4 ; SSE41-NEXT: jmp .LBB80_12 -; SSE41-NEXT: .LBB80_10: +; SSE41-NEXT: .LBB80_11: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5103,11 +5111,11 @@ ; SSE41-NEXT: .LBB80_12: ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_13 -; SSE41-NEXT: # %bb.14: +; SSE41-NEXT: js .LBB80_14 +; SSE41-NEXT: # %bb.13: ; SSE41-NEXT: cvtsi2ssq %rax, %xmm6 ; SSE41-NEXT: jmp .LBB80_15 -; SSE41-NEXT: .LBB80_13: +; SSE41-NEXT: .LBB80_14: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5118,12 +5126,12 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_16 -; SSE41-NEXT: # %bb.17: +; SSE41-NEXT: js .LBB80_17 +; SSE41-NEXT: # %bb.16: ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE41-NEXT: jmp .LBB80_18 -; SSE41-NEXT: .LBB80_16: +; SSE41-NEXT: .LBB80_17: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5136,12 +5144,12 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm5[0],xmm0[3] ; SSE41-NEXT: movq %xmm2, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_19 -; SSE41-NEXT: # %bb.20: +; SSE41-NEXT: js .LBB80_20 +; SSE41-NEXT: # %bb.19: ; SSE41-NEXT: xorps %xmm3, %xmm3 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE41-NEXT: jmp .LBB80_21 -; SSE41-NEXT: .LBB80_19: +; SSE41-NEXT: .LBB80_20: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5154,13 +5162,13 @@ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] ; SSE41-NEXT: pextrq $1, %xmm2, %rax ; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: js .LBB80_22 -; SSE41-NEXT: # %bb.23: +; SSE41-NEXT: js .LBB80_23 +; SSE41-NEXT: # %bb.22: ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; SSE41-NEXT: retq -; SSE41-NEXT: .LBB80_22: +; SSE41-NEXT: .LBB80_23: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx ; SSE41-NEXT: andl $1, %eax @@ -5177,11 +5185,11 @@ ; AVX1-NEXT: vmovdqa 32(%rdi), %ymm2 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_1 -; AVX1-NEXT: # %bb.2: +; AVX1-NEXT: js .LBB80_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX1-NEXT: jmp .LBB80_3 -; AVX1-NEXT: .LBB80_1: +; AVX1-NEXT: .LBB80_2: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5191,11 +5199,11 @@ ; AVX1-NEXT: .LBB80_3: ; AVX1-NEXT: vmovq %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_4 -; AVX1-NEXT: # %bb.5: +; AVX1-NEXT: js .LBB80_5 +; AVX1-NEXT: # %bb.4: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm4 ; AVX1-NEXT: jmp .LBB80_6 -; AVX1-NEXT: .LBB80_4: +; AVX1-NEXT: .LBB80_5: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5206,11 +5214,11 @@ ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 ; AVX1-NEXT: vmovq %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_7 -; AVX1-NEXT: # %bb.8: +; AVX1-NEXT: js .LBB80_8 +; AVX1-NEXT: # %bb.7: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 ; AVX1-NEXT: jmp .LBB80_9 -; AVX1-NEXT: .LBB80_7: +; AVX1-NEXT: .LBB80_8: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5220,11 +5228,11 @@ ; AVX1-NEXT: .LBB80_9: ; AVX1-NEXT: vpextrq $1, %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_10 -; AVX1-NEXT: # %bb.11: +; AVX1-NEXT: js .LBB80_11 +; AVX1-NEXT: # %bb.10: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 ; AVX1-NEXT: jmp .LBB80_12 -; AVX1-NEXT: .LBB80_10: +; AVX1-NEXT: .LBB80_11: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5234,11 +5242,11 @@ ; AVX1-NEXT: .LBB80_12: ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_13 -; AVX1-NEXT: # %bb.14: +; AVX1-NEXT: js .LBB80_14 +; AVX1-NEXT: # %bb.13: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5 ; AVX1-NEXT: jmp .LBB80_15 -; AVX1-NEXT: .LBB80_13: +; AVX1-NEXT: .LBB80_14: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5249,11 +5257,11 @@ ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[2,3] ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_16 -; AVX1-NEXT: # %bb.17: +; AVX1-NEXT: js .LBB80_17 +; AVX1-NEXT: # %bb.16: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm4 ; AVX1-NEXT: jmp .LBB80_18 -; AVX1-NEXT: .LBB80_16: +; AVX1-NEXT: .LBB80_17: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5266,11 +5274,11 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vmovq %xmm3, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_19 -; AVX1-NEXT: # %bb.20: +; AVX1-NEXT: js .LBB80_20 +; AVX1-NEXT: # %bb.19: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0 ; AVX1-NEXT: jmp .LBB80_21 -; AVX1-NEXT: .LBB80_19: +; AVX1-NEXT: .LBB80_20: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5282,11 +5290,11 @@ ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm3, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB80_22 -; AVX1-NEXT: # %bb.23: +; AVX1-NEXT: js .LBB80_23 +; AVX1-NEXT: # %bb.22: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm1 ; AVX1-NEXT: jmp .LBB80_24 -; AVX1-NEXT: .LBB80_22: +; AVX1-NEXT: .LBB80_23: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -5304,11 +5312,11 @@ ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm2 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_1 -; AVX2-NEXT: # %bb.2: +; AVX2-NEXT: js .LBB80_2 +; AVX2-NEXT: # %bb.1: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX2-NEXT: jmp .LBB80_3 -; AVX2-NEXT: .LBB80_1: +; AVX2-NEXT: .LBB80_2: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5318,11 +5326,11 @@ ; AVX2-NEXT: .LBB80_3: ; AVX2-NEXT: vmovq %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_4 -; AVX2-NEXT: # %bb.5: +; AVX2-NEXT: js .LBB80_5 +; AVX2-NEXT: # %bb.4: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm4 ; AVX2-NEXT: jmp .LBB80_6 -; AVX2-NEXT: .LBB80_4: +; AVX2-NEXT: .LBB80_5: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5333,11 +5341,11 @@ ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vmovq %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_7 -; AVX2-NEXT: # %bb.8: +; AVX2-NEXT: js .LBB80_8 +; AVX2-NEXT: # %bb.7: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 ; AVX2-NEXT: jmp .LBB80_9 -; AVX2-NEXT: .LBB80_7: +; AVX2-NEXT: .LBB80_8: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5347,11 +5355,11 @@ ; AVX2-NEXT: .LBB80_9: ; AVX2-NEXT: vpextrq $1, %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_10 -; AVX2-NEXT: # %bb.11: +; AVX2-NEXT: js .LBB80_11 +; AVX2-NEXT: # %bb.10: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 ; AVX2-NEXT: jmp .LBB80_12 -; AVX2-NEXT: .LBB80_10: +; AVX2-NEXT: .LBB80_11: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5361,11 +5369,11 @@ ; AVX2-NEXT: .LBB80_12: ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_13 -; AVX2-NEXT: # %bb.14: +; AVX2-NEXT: js .LBB80_14 +; AVX2-NEXT: # %bb.13: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5 ; AVX2-NEXT: jmp .LBB80_15 -; AVX2-NEXT: .LBB80_13: +; AVX2-NEXT: .LBB80_14: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5376,11 +5384,11 @@ ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[2,3] ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_16 -; AVX2-NEXT: # %bb.17: +; AVX2-NEXT: js .LBB80_17 +; AVX2-NEXT: # %bb.16: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm4 ; AVX2-NEXT: jmp .LBB80_18 -; AVX2-NEXT: .LBB80_16: +; AVX2-NEXT: .LBB80_17: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5393,11 +5401,11 @@ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 ; AVX2-NEXT: vmovq %xmm3, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_19 -; AVX2-NEXT: # %bb.20: +; AVX2-NEXT: js .LBB80_20 +; AVX2-NEXT: # %bb.19: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0 ; AVX2-NEXT: jmp .LBB80_21 -; AVX2-NEXT: .LBB80_19: +; AVX2-NEXT: .LBB80_20: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -5409,11 +5417,11 @@ ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm3, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB80_22 -; AVX2-NEXT: # %bb.23: +; AVX2-NEXT: js .LBB80_23 +; AVX2-NEXT: # %bb.22: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm1 ; AVX2-NEXT: jmp .LBB80_24 -; AVX2-NEXT: .LBB80_22: +; AVX2-NEXT: .LBB80_23: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -806,11 +806,11 @@ ; ; Load the value of b. ; Create the zero value for the select assignment. -; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] -; CHECK-NEXT: cmpb $0, _b(%rip) -; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]] +; CHECK: cmpb $0, _b(%rip) +; CHECK-NEXT: movb $48, [[CMOVE_VAL:%al]] +; CHECK-NEXT: je [[STOREC_LABEL:LBB[0-9_]+]] ; -; CHECK: movb $48, [[CMOVE_VAL:%al]] +; CHECK: movl $0, [[CMOVE_VAL:%eax]] ; ; CHECK: [[STOREC_LABEL]]: ;