Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16437,6 +16437,18 @@ return SDValue(); } +// Look up CMP with 0 (which is the TEST pattern) among uses of OP. +static SDValue lookupCmpWithZeroUse(SDValue Op) { + for (SDNode *U : Op->uses()) { + if (U->getOpcode() == X86ISD::CMP) { + auto *CN = dyn_cast(U->getOperand(1)); + if (CN->getAPIntValue() == 0) + return SDValue(U, 0); + } + } + return SDValue(); +} + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, @@ -16639,6 +16651,17 @@ break; } + } else if (SDValue V = lookupCmpWithZeroUse(Op)) { + // If we found that AND instruction already has a use in form of + // CMP with 0 (TEST pattern) - use this as the condition. The + // common example of that is when we lower two selects that + // use the same AND as the condition: + // %c = and i8 %cond, 1 + // %0 = select i1 %c, i32 a, i32 b + // %1 = select i1 %c, i32 c, i32 d + // - When we lower first select, we do emit TEST pattern, which we + // will use for the second select here. + return V; } LLVM_FALLTHROUGH; case ISD::SUB: Index: test/CodeGen/X86/atomic-minmax-i6432.ll =================================================================== --- test/CodeGen/X86/atomic-minmax-i6432.ll +++ test/CodeGen/X86/atomic-minmax-i6432.ll @@ -9,32 +9,32 @@ ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl ; LINUX: sbbl -; LINUX: jne -; LINUX: jne +; LINUX: cmovll +; LINUX: cmovll ; LINUX: lock cmpxchg8b ; LINUX: jne [[LABEL]] %2 = atomicrmw min i64* @sc64, i64 6 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl ; LINUX: sbbl -; LINUX: jne -; LINUX: jne +; LINUX: cmovll +; LINUX: cmovll ; LINUX: lock cmpxchg8b ; LINUX: jne [[LABEL]] %3 = atomicrmw umax i64* @sc64, i64 7 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl ; LINUX: sbbl -; LINUX: jne -; LINUX: jne +; LINUX: cmovbl +; LINUX: cmovbl ; LINUX: lock cmpxchg8b ; LINUX: jne [[LABEL]] %4 = atomicrmw umin i64* @sc64, i64 8 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl ; LINUX: sbbl -; LINUX: jne -; LINUX: jne +; LINUX: cmovbl +; LINUX: cmovbl ; LINUX: lock cmpxchg8b ; LINUX: jne [[LABEL]] ret void Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -165,26 +165,13 @@ ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: sbbq %rdx, %rcx -; CHECK-NEXT: setge %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB5_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB5_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB5_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB5_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: cmovgeq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovgeq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB5_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx @@ -211,26 +198,13 @@ ; CHECK-NEXT: cmpq %rsi, %rax ; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: sbbq %r8, %rcx -; CHECK-NEXT: setge %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB6_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB6_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB6_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB6_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: cmovgeq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovgeq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB6_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx @@ -257,26 +231,13 @@ ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: sbbq %rdx, %rcx -; CHECK-NEXT: setae %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB7_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB7_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB7_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB7_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 +; CHECK-NEXT: cmovaeq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovaeq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB7_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx @@ -303,26 +264,13 @@ ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: sbbq %rdx, %rcx -; CHECK-NEXT: setb %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB8_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB8_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB8_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB8_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: cmovbq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovbq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB8_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -218,3 +218,22 @@ ret i32 %sel } +; CHECK-LABEL: test8: +; CHECK: # BB#0: +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: movl $31415, %ecx +; CHECK-NEXT: cmovnel %eax, %ecx +; CHECK-NEXT: movl $4048, %edx +; CHECK-NEXT: movl $666, %eax +; CHECK-NEXT: cmovnel %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: retq + +define i32 @test8(i1 %c) { +entry: + %0 = select i1 %c, i32 42, i32 31415 + %1 = select i1 %c, i32 4048, i32 666 + %ret = add i32 %0, %1 + ret i32 %ret +} Index: test/CodeGen/X86/cmp.ll =================================================================== --- test/CodeGen/X86/cmp.ll +++ test/CodeGen/X86/cmp.ll @@ -275,7 +275,7 @@ ret void ; CHECK-LABEL: test20 -; CHECK: andl +; CHECK: testl ; CHECK: setne ; CHECK: addl ; CHECK: setne Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -166,19 +166,15 @@ ; MCU-LABEL: test5: ; MCU: # BB#0: ; MCU-NEXT: pushl %esi -; MCU-NEXT: andb $1, %al +; MCU-NEXT: movl {{[0-9]+}}(%esp), %esi +; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB4_2 ; MCU-NEXT: # BB#1: +; MCU-NEXT: movw {{[0-9]+}}(%esp), %cx ; MCU-NEXT: movw {{[0-9]+}}(%esp), %dx ; MCU-NEXT: .LBB4_2: -; MCU-NEXT: movl {{[0-9]+}}(%esp), %esi -; MCU-NEXT: testb %al, %al -; MCU-NEXT: jne .LBB4_4 -; MCU-NEXT: # BB#3: -; MCU-NEXT: movw {{[0-9]+}}(%esp), %cx -; MCU-NEXT: .LBB4_4: -; MCU-NEXT: movw %dx, (%esi) ; MCU-NEXT: movw %cx, 2(%esi) +; MCU-NEXT: movw %dx, (%esi) ; MCU-NEXT: popl %esi ; MCU-NEXT: retl %x = select i1 %c, <2 x i16> %a, <2 x i16> %b @@ -292,81 +288,69 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind { ; GENERIC-LABEL: test8: ; GENERIC: ## BB#0: -; GENERIC-NEXT: andb $1, %dil +; GENERIC-NEXT: testb $1, %dil ; GENERIC-NEXT: jne LBB7_1 ; GENERIC-NEXT: ## BB#2: -; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; GENERIC-NEXT: jmp LBB7_3 -; GENERIC-NEXT: LBB7_1: ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; GENERIC-NEXT: LBB7_3: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; GENERIC-NEXT: testb %dil, %dil -; GENERIC-NEXT: jne LBB7_4 -; GENERIC-NEXT: ## BB#5: +; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; GENERIC-NEXT: jmp LBB7_3 +; GENERIC-NEXT: LBB7_1: +; GENERIC-NEXT: movd %r9d, %xmm0 +; GENERIC-NEXT: movd %r8d, %xmm1 +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; GENERIC-NEXT: movd %ecx, %xmm2 +; GENERIC-NEXT: movd %edx, %xmm0 +; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; GENERIC-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; GENERIC-NEXT: jmp LBB7_6 -; GENERIC-NEXT: LBB7_4: -; GENERIC-NEXT: movd %r9d, %xmm1 -; GENERIC-NEXT: movd %r8d, %xmm2 -; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; GENERIC-NEXT: movd %ecx, %xmm3 -; GENERIC-NEXT: movd %edx, %xmm1 -; GENERIC-NEXT: LBB7_6: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; GENERIC-NEXT: LBB7_3: +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2 -; GENERIC-NEXT: paddd %xmm2, %xmm1 ; GENERIC-NEXT: paddd %xmm2, %xmm0 -; GENERIC-NEXT: movq %xmm0, 16(%rsi) -; GENERIC-NEXT: movdqa %xmm1, (%rsi) +; GENERIC-NEXT: paddd %xmm2, %xmm1 +; GENERIC-NEXT: movq %xmm1, 16(%rsi) +; GENERIC-NEXT: movdqa %xmm0, (%rsi) ; GENERIC-NEXT: retq ; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test8: ; ATOM: ## BB#0: -; ATOM-NEXT: andb $1, %dil +; ATOM-NEXT: testb $1, %dil ; ATOM-NEXT: jne LBB7_1 ; ATOM-NEXT: ## BB#2: -; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; ATOM-NEXT: jmp LBB7_3 -; ATOM-NEXT: LBB7_1: -; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; ATOM-NEXT: LBB7_3: -; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; ATOM-NEXT: testb %dil, %dil -; ATOM-NEXT: jne LBB7_4 -; ATOM-NEXT: ## BB#5: ; ATOM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; ATOM-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; ATOM-NEXT: jmp LBB7_6 -; ATOM-NEXT: LBB7_4: -; ATOM-NEXT: movd %r9d, %xmm1 +; ATOM-NEXT: jmp LBB7_3 +; ATOM-NEXT: LBB7_1: +; ATOM-NEXT: movd %r9d, %xmm0 ; ATOM-NEXT: movd %r8d, %xmm2 -; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; ATOM-NEXT: movd %ecx, %xmm3 -; ATOM-NEXT: movd %edx, %xmm1 -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; ATOM-NEXT: LBB7_6: +; ATOM-NEXT: movd %edx, %xmm0 +; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ATOM-NEXT: LBB7_3: +; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 +; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; ATOM-NEXT: paddd %xmm2, %xmm0 ; ATOM-NEXT: paddd %xmm2, %xmm1 -; ATOM-NEXT: movq %xmm0, 16(%rsi) -; ATOM-NEXT: movdqa %xmm1, (%rsi) +; ATOM-NEXT: movq %xmm1, 16(%rsi) +; ATOM-NEXT: movdqa %xmm0, (%rsi) ; ATOM-NEXT: retq ; ATOM-NEXT: ## -- End function ; @@ -376,68 +360,67 @@ ; MCU-NEXT: pushl %ebx ; MCU-NEXT: pushl %edi ; MCU-NEXT: pushl %esi -; MCU-NEXT: andb $1, %al +; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB7_1 ; MCU-NEXT: # BB#2: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx -; MCU-NEXT: movl (%ecx), %ecx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax +; MCU-NEXT: movl (%eax), %eax ; MCU-NEXT: je .LBB7_5 ; MCU-NEXT: .LBB7_4: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi -; MCU-NEXT: movl (%esi), %esi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx +; MCU-NEXT: movl (%ecx), %ecx ; MCU-NEXT: je .LBB7_8 ; MCU-NEXT: .LBB7_7: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi -; MCU-NEXT: movl (%edi), %edi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi +; MCU-NEXT: movl (%esi), %esi ; MCU-NEXT: je .LBB7_11 ; MCU-NEXT: .LBB7_10: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx -; MCU-NEXT: movl (%ebx), %ebx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi +; MCU-NEXT: movl (%edi), %edi ; MCU-NEXT: je .LBB7_14 ; MCU-NEXT: .LBB7_13: +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx +; MCU-NEXT: movl (%ebx), %ebx +; MCU-NEXT: je .LBB7_17 +; MCU-NEXT: .LBB7_16: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp -; MCU-NEXT: jmp .LBB7_15 +; MCU-NEXT: jmp .LBB7_18 ; MCU-NEXT: .LBB7_1: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx -; MCU-NEXT: movl (%ecx), %ecx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax +; MCU-NEXT: movl (%eax), %eax ; MCU-NEXT: jne .LBB7_4 ; MCU-NEXT: .LBB7_5: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi -; MCU-NEXT: movl (%esi), %esi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx +; MCU-NEXT: movl (%ecx), %ecx ; MCU-NEXT: jne .LBB7_7 ; MCU-NEXT: .LBB7_8: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi -; MCU-NEXT: movl (%edi), %edi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi +; MCU-NEXT: movl (%esi), %esi ; MCU-NEXT: jne .LBB7_10 ; MCU-NEXT: .LBB7_11: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx -; MCU-NEXT: movl (%ebx), %ebx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi +; MCU-NEXT: movl (%edi), %edi ; MCU-NEXT: jne .LBB7_13 ; MCU-NEXT: .LBB7_14: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp -; MCU-NEXT: .LBB7_15: -; MCU-NEXT: movl (%ebp), %ebp -; MCU-NEXT: testb %al, %al +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx +; MCU-NEXT: movl (%ebx), %ebx ; MCU-NEXT: jne .LBB7_16 -; MCU-NEXT: # BB#17: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax -; MCU-NEXT: jmp .LBB7_18 -; MCU-NEXT: .LBB7_16: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax +; MCU-NEXT: .LBB7_17: +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp ; MCU-NEXT: .LBB7_18: -; MCU-NEXT: movl (%eax), %eax -; MCU-NEXT: decl %eax +; MCU-NEXT: movl (%ebp), %ebp ; MCU-NEXT: decl %ebp ; MCU-NEXT: decl %ebx ; MCU-NEXT: decl %edi ; MCU-NEXT: decl %esi ; MCU-NEXT: decl %ecx -; MCU-NEXT: movl %ecx, 20(%edx) -; MCU-NEXT: movl %esi, 16(%edx) -; MCU-NEXT: movl %edi, 12(%edx) -; MCU-NEXT: movl %ebx, 8(%edx) -; MCU-NEXT: movl %ebp, 4(%edx) -; MCU-NEXT: movl %eax, (%edx) +; MCU-NEXT: decl %eax +; MCU-NEXT: movl %eax, 20(%edx) +; MCU-NEXT: movl %ecx, 16(%edx) +; MCU-NEXT: movl %esi, 12(%edx) +; MCU-NEXT: movl %edi, 8(%edx) +; MCU-NEXT: movl %ebx, 4(%edx) +; MCU-NEXT: movl %ebp, (%edx) ; MCU-NEXT: popl %esi ; MCU-NEXT: popl %edi ; MCU-NEXT: popl %ebx