Index: lib/Target/X86/X86RegisterInfo.h =================================================================== --- lib/Target/X86/X86RegisterInfo.h +++ lib/Target/X86/X86RegisterInfo.h @@ -95,6 +95,8 @@ unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + bool enableMultipleCopyHints() const override { return true; } + /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. const MCPhysReg * Index: test/CodeGen/X86/GlobalISel/add-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-scalar.ll +++ test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -63,8 +63,9 @@ define i8 @test_add_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_add_i8: ; X64: # %bb.0: -; X64-NEXT: addb %dil, %sil ; X64-NEXT: movl %esi, %eax +; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: test_add_i8: Index: test/CodeGen/X86/GlobalISel/and-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/and-scalar.ll +++ test/CodeGen/X86/GlobalISel/and-scalar.ll @@ -19,8 +19,9 @@ define i8 @test_and_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_and_i8: ; ALL: # %bb.0: -; ALL-NEXT: andb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andb %dil, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %ret = and i8 %arg1, %arg2 ret i8 %ret @@ -29,8 +30,9 @@ define i16 @test_and_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_and_i16: ; ALL: # %bb.0: -; ALL-NEXT: andw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq %ret = and i16 %arg1, %arg2 ret i16 %ret @@ -39,8 +41,8 @@ define i32 @test_and_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_and_i32: ; ALL: # %bb.0: -; ALL-NEXT: andl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andl %edi, %eax ; ALL-NEXT: retq %ret = and i32 %arg1, %arg2 ret i32 %ret @@ -49,8 +51,8 @@ define i64 @test_and_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_and_i64: ; ALL: # %bb.0: -; ALL-NEXT: andq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andq %rdi, %rax ; ALL-NEXT: retq %ret = and i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/binop.ll =================================================================== --- test/CodeGen/X86/GlobalISel/binop.ll +++ test/CodeGen/X86/GlobalISel/binop.ll @@ -7,8 +7,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_sub_i64: ; ALL: # %bb.0: -; ALL-NEXT: subq %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: subq %rsi, %rax ; ALL-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -17,8 +17,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_sub_i32: ; ALL: # %bb.0: -; ALL-NEXT: subl %esi, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: subl %esi, %eax ; ALL-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret Index: test/CodeGen/X86/GlobalISel/callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/callingconv.ll +++ test/CodeGen/X86/GlobalISel/callingconv.ll @@ -38,6 +38,7 @@ ; X64-LABEL: test_arg_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ret i8 %a } @@ -51,6 +52,7 @@ ; X64-LABEL: test_arg_i16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ret i16 %a } @@ -114,8 +116,8 @@ ; X32: # %bb.0: ; X32-NEXT: subl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 16 -; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: movaps %xmm2, %xmm0 +; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: addl $12, %esp ; X32-NEXT: retl ; @@ -248,8 +250,8 @@ ; X32-NEXT: .cfi_def_cfa_offset 48 ; X32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: movdqu {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-NEXT: movdqu {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: calll split_return_callee ; X32-NEXT: paddd (%esp), %xmm0 # 16-byte Folded Reload ; X32-NEXT: paddd {{[0-9]+}}(%esp), %xmm1 # 16-byte Folded Reload Index: test/CodeGen/X86/GlobalISel/ext-x86-64.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext-x86-64.ll +++ test/CodeGen/X86/GlobalISel/ext-x86-64.ll @@ -6,9 +6,8 @@ define i64 @test_zext_i1(i8 %a) { ; X64-LABEL: test_zext_i1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andq $1, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andq $1, %rax ; X64-NEXT: retq %val = trunc i8 %a to i1 %r = zext i1 %val to i64 Index: test/CodeGen/X86/GlobalISel/ext.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext.ll +++ test/CodeGen/X86/GlobalISel/ext.ll @@ -5,8 +5,9 @@ define i8 @test_zext_i1toi8(i32 %a) { ; X64-LABEL: test_zext_i1toi8: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi8: @@ -23,8 +24,9 @@ define i16 @test_zext_i1toi16(i32 %a) { ; X64-LABEL: test_zext_i1toi16: ; X64: # %bb.0: -; X64-NEXT: andw $1, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andw $1, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi16: @@ -41,8 +43,8 @@ define i32 @test_zext_i1(i32 %a) { ; X64-LABEL: test_zext_i1: ; X64: # %bb.0: -; X64-NEXT: andl $1, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1: Index: test/CodeGen/X86/GlobalISel/memop-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/memop-scalar.ll +++ test/CodeGen/X86/GlobalISel/memop-scalar.ll @@ -82,9 +82,9 @@ define i1 * @test_store_i1(i1 %val, i1 * %p1) { ; ALL-LABEL: test_store_i1: ; ALL: # %bb.0: -; ALL-NEXT: andb $1, %dil -; ALL-NEXT: movb %dil, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andb $1, %dil +; ALL-NEXT: movb %dil, (%rax) ; ALL-NEXT: retq store i1 %val, i1* %p1 ret i1 * %p1; @@ -93,8 +93,8 @@ define i32 * @test_store_i32(i32 %val, i32 * %p1) { ; ALL-LABEL: test_store_i32: ; ALL: # %bb.0: -; ALL-NEXT: movl %edi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movl %edi, (%rax) ; ALL-NEXT: retq store i32 %val, i32* %p1 ret i32 * %p1; @@ -103,8 +103,8 @@ define i64 * @test_store_i64(i64 %val, i64 * %p1) { ; ALL-LABEL: test_store_i64: ; ALL: # %bb.0: -; ALL-NEXT: movq %rdi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movq %rdi, (%rax) ; ALL-NEXT: retq store i64 %val, i64* %p1 ret i64 * %p1; @@ -114,15 +114,15 @@ ; ; SSE_FAST-LABEL: test_store_float: ; SSE_FAST: # %bb.0: -; SSE_FAST-NEXT: movd %xmm0, %eax -; SSE_FAST-NEXT: movl %eax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movd %xmm0, %ecx +; SSE_FAST-NEXT: movl %ecx, (%rax) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_float: ; SSE_GREEDY: # %bb.0: -; SSE_GREEDY-NEXT: movss %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movss %xmm0, (%rax) ; SSE_GREEDY-NEXT: retq store float %val, float* %p1 ret float * %p1; @@ -132,15 +132,15 @@ ; ; SSE_FAST-LABEL: test_store_double: ; SSE_FAST: # %bb.0: -; SSE_FAST-NEXT: movq %xmm0, %rax -; SSE_FAST-NEXT: movq %rax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movq %xmm0, %rcx +; SSE_FAST-NEXT: movq %rcx, (%rax) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_double: ; SSE_GREEDY: # %bb.0: -; SSE_GREEDY-NEXT: movsd %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movsd %xmm0, (%rax) ; SSE_GREEDY-NEXT: retq store double %val, double* %p1 ret double * %p1; Index: test/CodeGen/X86/GlobalISel/mul-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/mul-scalar.ll +++ test/CodeGen/X86/GlobalISel/mul-scalar.ll @@ -8,31 +8,32 @@ ;} define i16 @test_mul_i16(i16 %arg1, i16 %arg2) { -; X64-LABEL: test_mul_i16: -; X64: # %bb.0: -; X64-NEXT: imulw %di, %si -; X64-NEXT: movl %esi, %eax -; X64-NEXT: retq +; ALL-LABEL: test_mul_i16: +; ALL: # %bb.0: +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: imulw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax +; ALL-NEXT: retq %ret = mul i16 %arg1, %arg2 ret i16 %ret } define i32 @test_mul_i32(i32 %arg1, i32 %arg2) { -; X64-LABEL: test_mul_i32: -; X64: # %bb.0: -; X64-NEXT: imull %edi, %esi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: retq +; ALL-LABEL: test_mul_i32: +; ALL: # %bb.0: +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: imull %edi, %eax +; ALL-NEXT: retq %ret = mul i32 %arg1, %arg2 ret i32 %ret } define i64 @test_mul_i64(i64 %arg1, i64 %arg2) { -; X64-LABEL: test_mul_i64: -; X64: # %bb.0: -; X64-NEXT: imulq %rdi, %rsi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: retq +; ALL-LABEL: test_mul_i64: +; ALL: # %bb.0: +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: imulq %rdi, %rax +; ALL-NEXT: retq %ret = mul i64 %arg1, %arg2 ret i64 %ret } Index: test/CodeGen/X86/GlobalISel/or-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/or-scalar.ll +++ test/CodeGen/X86/GlobalISel/or-scalar.ll @@ -19,8 +19,9 @@ define i8 @test_or_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_or_i8: ; ALL: # %bb.0: -; ALL-NEXT: orb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orb %dil, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %ret = or i8 %arg1, %arg2 ret i8 %ret @@ -29,8 +30,9 @@ define i16 @test_or_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_or_i16: ; ALL: # %bb.0: -; ALL-NEXT: orw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq %ret = or i16 %arg1, %arg2 ret i16 %ret @@ -39,8 +41,8 @@ define i32 @test_or_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_or_i32: ; ALL: # %bb.0: -; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orl %edi, %eax ; ALL-NEXT: retq %ret = or i32 %arg1, %arg2 ret i32 %ret @@ -49,8 +51,8 @@ define i64 @test_or_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_or_i64: ; ALL: # %bb.0: -; ALL-NEXT: orq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: orq %rdi, %rax ; ALL-NEXT: retq %ret = or i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/phi.ll =================================================================== --- test/CodeGen/X86/GlobalISel/phi.ll +++ test/CodeGen/X86/GlobalISel/phi.ll @@ -4,15 +4,16 @@ define i8 @test_i8(i32 %a, i8 %f, i8 %t) { ; ALL-LABEL: test_i8: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB0_2 ; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB0_2: # %cond.end -; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -32,15 +33,16 @@ define i16 @test_i16(i32 %a, i16 %f, i16 %t) { ; ALL-LABEL: test_i16: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB1_2 ; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB1_2: # %cond.end -; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -60,15 +62,15 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) { ; ALL-LABEL: test_i32: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB2_2 ; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB2_2: # %cond.end -; ALL-NEXT: movl %esi, %eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -88,15 +90,15 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) { ; ALL-LABEL: test_i64: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB3_2 ; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movq %rdx, %rsi +; ALL-NEXT: movq %rdx, %rax ; ALL-NEXT: .LBB3_2: # %cond.end -; ALL-NEXT: movq %rsi, %rax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 Index: test/CodeGen/X86/GlobalISel/sub-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -4,8 +4,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_sub_i64: ; X64: # %bb.0: -; X64-NEXT: subq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax ; X64-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -14,8 +14,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_sub_i32: ; X64: # %bb.0: -; X64-NEXT: subl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret @@ -24,8 +24,9 @@ define i16 @test_sub_i16(i16 %arg1, i16 %arg2) { ; X64-LABEL: test_sub_i16: ; X64: # %bb.0: -; X64-NEXT: subw %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subw %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %ret = sub i16 %arg1, %arg2 ret i16 %ret @@ -34,8 +35,9 @@ define i8 @test_sub_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_sub_i8: ; X64: # %bb.0: -; X64-NEXT: subb %sil, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %ret = sub i8 %arg1, %arg2 ret i8 %ret Index: test/CodeGen/X86/GlobalISel/trunc.ll =================================================================== --- test/CodeGen/X86/GlobalISel/trunc.ll +++ test/CodeGen/X86/GlobalISel/trunc.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: trunc_i32toi1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r = trunc i32 %a to i1 ret i1 %r @@ -14,6 +15,7 @@ ; CHECK-LABEL: trunc_i32toi8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r = trunc i32 %a to i8 ret i8 %r @@ -23,6 +25,7 @@ ; CHECK-LABEL: trunc_i32toi16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %r = trunc i32 %a to i16 ret i16 %r @@ -31,7 +34,8 @@ define i8 @trunc_i64toi8(i64 %a) { ; CHECK-LABEL: trunc_i64toi8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $al killed $al killed $rax ; CHECK-NEXT: retq %r = trunc i64 %a to i8 ret i8 %r @@ -40,7 +44,8 @@ define i16 @trunc_i64toi16(i64 %a) { ; CHECK-LABEL: trunc_i64toi16: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax ; CHECK-NEXT: retq %r = trunc i64 %a to i16 ret i16 %r @@ -49,7 +54,8 @@ define i32 @trunc_i64toi32(i64 %a) { ; CHECK-LABEL: trunc_i64toi32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %r = trunc i64 %a to i32 ret i32 %r Index: test/CodeGen/X86/GlobalISel/undef.ll =================================================================== --- test/CodeGen/X86/GlobalISel/undef.ll +++ test/CodeGen/X86/GlobalISel/undef.ll @@ -11,8 +11,9 @@ define i8 @test2(i8 %a) { ; ALL-LABEL: test2: ; ALL: # %bb.0: -; ALL-NEXT: addb %al, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: addb %al, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %r = add i8 %a, undef ret i8 %r Index: test/CodeGen/X86/GlobalISel/xor-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/xor-scalar.ll +++ test/CodeGen/X86/GlobalISel/xor-scalar.ll @@ -19,8 +19,9 @@ define i8 @test_xor_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_xor_i8: ; ALL: # %bb.0: -; ALL-NEXT: xorb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorb %dil, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %ret = xor i8 %arg1, %arg2 ret i8 %ret @@ -29,8 +30,9 @@ define i16 @test_xor_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_xor_i16: ; ALL: # %bb.0: -; ALL-NEXT: xorw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq %ret = xor i16 %arg1, %arg2 ret i16 %ret @@ -39,8 +41,8 @@ define i32 @test_xor_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_xor_i32: ; ALL: # %bb.0: -; ALL-NEXT: xorl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %edi, %eax ; ALL-NEXT: retq %ret = xor i32 %arg1, %arg2 ret i32 %ret @@ -49,8 +51,8 @@ define i64 @test_xor_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_xor_i64: ; ALL: # %bb.0: -; ALL-NEXT: xorq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorq %rdi, %rax ; ALL-NEXT: retq %ret = xor i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -16,14 +16,14 @@ ; ; X64-LINUX-LABEL: test1: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: subl $-128, %edi ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: subl $-128, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test1: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: subl $-128, %ecx ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: subl $-128, %eax ; X64-WIN32-NEXT: retq entry: %b = add i32 %a, 128 @@ -38,14 +38,14 @@ ; ; X64-LINUX-LABEL: test2: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: subq $-2147483648, %rdi # imm = 0x80000000 ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test2: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: subq $-2147483648, %rcx # imm = 0x80000000 ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 2147483648 @@ -60,14 +60,14 @@ ; ; X64-LINUX-LABEL: test3: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: subq $-128, %rdi ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-128, %rax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test3: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: subq $-128, %rcx ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-128, %rax ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 128 @@ -204,16 +204,16 @@ ; ; X64-LINUX-LABEL: test7: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: addl %esi, %edi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: addl %esi, %eax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test7: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: addl %edx, %ecx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: addl %edx, %eax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -233,16 +233,16 @@ ; ; X64-LINUX-LABEL: test8: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: addq %rsi, %rdi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: addq %rsi, %rax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test8: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: addq %rdx, %rcx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: addq %rdx, %rax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %extleft = zext i64 %left to i65 @@ -268,20 +268,20 @@ ; ; X64-LINUX-LABEL: test9: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: xorl %eax, %eax -; X64-LINUX-NEXT: cmpl $10, %edi -; X64-LINUX-NEXT: sete %al -; X64-LINUX-NEXT: subl %eax, %esi ; X64-LINUX-NEXT: movl %esi, %eax +; X64-LINUX-NEXT: xorl %ecx, %ecx +; X64-LINUX-NEXT: cmpl $10, %edi +; X64-LINUX-NEXT: sete %cl +; X64-LINUX-NEXT: subl %ecx, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test9: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: xorl %eax, %eax -; X64-WIN32-NEXT: cmpl $10, %ecx -; X64-WIN32-NEXT: sete %al -; X64-WIN32-NEXT: subl %eax, %edx ; X64-WIN32-NEXT: movl %edx, %eax +; X64-WIN32-NEXT: xorl %edx, %edx +; X64-WIN32-NEXT: cmpl $10, %ecx +; X64-WIN32-NEXT: sete %dl +; X64-WIN32-NEXT: subl %edx, %eax ; X64-WIN32-NEXT: retq entry: %cmp = icmp eq i32 %x, 10 Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -110,15 +110,15 @@ define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) { ; CHECK-LABEL: pr31719: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx ; CHECK-NEXT: adcq 8(%rsi), %rcx ; CHECK-NEXT: adcq 16(%rsi), %r8 ; CHECK-NEXT: adcq 24(%rsi), %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %scalar %arg.b, 0 @@ -205,9 +205,9 @@ define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: shiftadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: adcq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: adcq %rcx, %rax ; CHECK-NEXT: retq entry: %0 = zext i64 %a to i128 @@ -225,23 +225,23 @@ define %S @readd(%S* nocapture readonly %this, %S %arg.b) { ; CHECK-LABEL: readd: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: movq 8(%rsi), %r10 -; CHECK-NEXT: adcq $0, %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: addq %rcx, %r10 -; CHECK-NEXT: adcq 16(%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %r11 +; CHECK-NEXT: adcq $0, %r11 +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %edi +; CHECK-NEXT: addq %rcx, %r11 +; CHECK-NEXT: adcq 16(%rsi), %rdi ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %r8, %rdi ; CHECK-NEXT: adcq 24(%rsi), %rcx ; CHECK-NEXT: addq %r9, %rcx -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %r10, 8(%rdi) -; CHECK-NEXT: movq %rax, 16(%rdi) -; CHECK-NEXT: movq %rcx, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rcx, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/and-encoding.ll =================================================================== --- test/CodeGen/X86/and-encoding.ll +++ test/CodeGen/X86/and-encoding.ll @@ -46,9 +46,9 @@ define i32 @lopped32_32to8(i32 %x) { ; CHECK-LABEL: lopped32_32to8: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $4, %edi # encoding: [0xc1,0xef,0x04] -; CHECK-NEXT: andl $-16, %edi # encoding: [0x83,0xe7,0xf0] ; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: shrl $4, %eax # encoding: [0xc1,0xe8,0x04] +; CHECK-NEXT: andl $-16, %eax # encoding: [0x83,0xe0,0xf0] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i32 %x, 4 %and = and i32 %shr, 268435440 @@ -60,9 +60,9 @@ define i64 @lopped64_32to8(i64 %x) { ; CHECK-LABEL: lopped64_32to8: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $36, %rdi # encoding: [0x48,0xc1,0xef,0x24] -; CHECK-NEXT: andl $-16, %edi # encoding: [0x83,0xe7,0xf0] ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: shrq $36, %rax # encoding: [0x48,0xc1,0xe8,0x24] +; CHECK-NEXT: andl $-16, %eax # encoding: [0x83,0xe0,0xf0] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 36 %and = and i64 %shr, 268435440 @@ -74,9 +74,9 @@ define i64 @lopped64_64to8(i64 %x) { ; CHECK-LABEL: lopped64_64to8: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04] -; CHECK-NEXT: andq $-16, %rdi # encoding: [0x48,0x83,0xe7,0xf0] ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: shrq $4, %rax # encoding: [0x48,0xc1,0xe8,0x04] +; CHECK-NEXT: andq $-16, %rax # encoding: [0x48,0x83,0xe0,0xf0] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 4 %and = and i64 %shr, 1152921504606846960 @@ -88,10 +88,10 @@ define i64 @lopped64_64to32(i64 %x) { ; CHECK-LABEL: lopped64_64to32: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04] -; CHECK-NEXT: andq $-983056, %rdi # encoding: [0x48,0x81,0xe7,0xf0,0xff,0xf0,0xff] -; CHECK-NEXT: # imm = 0xFFF0FFF0 ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: shrq $4, %rax # encoding: [0x48,0xc1,0xe8,0x04] +; CHECK-NEXT: andq $-983056, %rax # encoding: [0x48,0x25,0xf0,0xff,0xf0,0xff] +; CHECK-NEXT: # imm = 0xFFF0FFF0 ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 4 %and = and i64 %shr, 1152921504605863920 Index: test/CodeGen/X86/andimm8.ll =================================================================== --- test/CodeGen/X86/andimm8.ll +++ test/CodeGen/X86/andimm8.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s ; PR8365 -; CHECK: andl $-64, %edi # encoding: [0x83,0xe7,0xc0] +; CHECK: andl $-64, %eax # encoding: [0x83,0xe0,0xc0] define i64 @bra(i32 %zed) nounwind { %t1 = zext i32 %zed to i64 @@ -19,13 +19,13 @@ } define i64 @bar(i64 %zed) nounwind { -; CHECK: andl $42, %edi # encoding: [0x83,0xe7,0x2a] +; CHECK: andl $42, %eax # encoding: [0x83,0xe0,0x2a] %t1 = and i64 %zed, 42 ret i64 %t1 } define i64 @baz(i64 %zed) nounwind { -; CHECK: andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f] +; CHECK: andl $2147483647, %eax # encoding: [0x25,0xff,0xff,0xff,0x7f] %t1 = and i64 %zed, 2147483647 ret i64 %t1 } Index: test/CodeGen/X86/anyext.ll =================================================================== --- test/CodeGen/X86/anyext.ll +++ test/CodeGen/X86/anyext.ll @@ -41,8 +41,9 @@ ; ; X64-LABEL: bar: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: divw %si ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: andl $1, %eax Index: test/CodeGen/X86/apm.ll =================================================================== --- test/CodeGen/X86/apm.ll +++ test/CodeGen/X86/apm.ll @@ -3,8 +3,8 @@ ; PR8573 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK-DAG: leaq (%rdi), %rax +; CHECK-DAG: movl %esi, %ecx ; CHECK-NEXT: monitor ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -20,8 +20,8 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx -; CHECK-NEXT: movl %esi, %eax +; CHECK-DAG: movl %edi, %ecx +; CHECK-DAG: movl %esi, %eax ; CHECK-NEXT: mwait ; WIN64-LABEL: bar: ; WIN64: movl %edx, %eax Index: test/CodeGen/X86/atomic-eflags-reuse.ll =================================================================== --- test/CodeGen/X86/atomic-eflags-reuse.ll +++ test/CodeGen/X86/atomic-eflags-reuse.ll @@ -5,16 +5,16 @@ define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_add_1_cmov_slt: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock incq (%rdi) -; FASTINCDEC-NEXT: cmovgl %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock incq (%rdi) +; FASTINCDEC-NEXT: cmovgl %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_add_1_cmov_slt: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $1, (%rdi) -; SLOWINCDEC-NEXT: cmovgl %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $1, (%rdi) +; SLOWINCDEC-NEXT: cmovgl %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -26,16 +26,16 @@ define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_add_1_cmov_sge: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock incq (%rdi) -; FASTINCDEC-NEXT: cmovlel %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock incq (%rdi) +; FASTINCDEC-NEXT: cmovlel %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_add_1_cmov_sge: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $1, (%rdi) -; SLOWINCDEC-NEXT: cmovlel %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $1, (%rdi) +; SLOWINCDEC-NEXT: cmovlel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -47,16 +47,16 @@ define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_sub_1_cmov_sle: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock decq (%rdi) -; FASTINCDEC-NEXT: cmovgel %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: cmovgel %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_sub_1_cmov_sle: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) -; SLOWINCDEC-NEXT: cmovgel %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: cmovgel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -68,16 +68,16 @@ define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_sub_1_cmov_sgt: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock decq (%rdi) -; FASTINCDEC-NEXT: cmovll %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: cmovll %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_sub_1_cmov_sgt: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) -; SLOWINCDEC-NEXT: cmovll %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: cmovll %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -159,11 +159,11 @@ define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sle: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovgl %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -175,11 +175,11 @@ define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sgt: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovlel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovlel %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -228,7 +228,19 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: movb $12, %al +; CHECK-NEXT: js .LBB11_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movb $34, %al +; CHECK-NEXT: .LBB11_2: # %entry +; CHECK-NEXT: movb %al, (%rsi) +; CHECK-NEXT: movb $56, %al +; CHECK-NEXT: js .LBB11_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movb $78, %al +; CHECK-NEXT: .LBB11_4: # %entry +; CHECK-NEXT: retq entry: %add = atomicrmw add i64* %p, i64 1 seq_cst %cmp = icmp slt i64 %add, 0 Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -12,10 +12,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq Index: test/CodeGen/X86/avg.ll =================================================================== --- test/CodeGen/X86/avg.ll +++ test/CodeGen/X86/avg.ll @@ -1655,95 +1655,95 @@ define <512 x i8> @avg_v512i8_3(<512 x i8> %a, <512 x i8> %b) nounwind { ; SSE2-LABEL: avg_v512i8_3: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 496(%rdi) +; SSE2-NEXT: movdqa %xmm8, 496(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 480(%rdi) +; SSE2-NEXT: movdqa %xmm8, 480(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 464(%rdi) +; SSE2-NEXT: movdqa %xmm8, 464(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 448(%rdi) +; SSE2-NEXT: movdqa %xmm8, 448(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 432(%rdi) +; SSE2-NEXT: movdqa %xmm8, 432(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 416(%rdi) +; SSE2-NEXT: movdqa %xmm8, 416(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 400(%rdi) +; SSE2-NEXT: movdqa %xmm8, 400(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 384(%rdi) +; SSE2-NEXT: movdqa %xmm8, 384(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 368(%rdi) +; SSE2-NEXT: movdqa %xmm8, 368(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 352(%rdi) +; SSE2-NEXT: movdqa %xmm8, 352(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 336(%rdi) +; SSE2-NEXT: movdqa %xmm8, 336(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 320(%rdi) +; SSE2-NEXT: movdqa %xmm8, 320(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 304(%rdi) +; SSE2-NEXT: movdqa %xmm8, 304(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 288(%rdi) +; SSE2-NEXT: movdqa %xmm8, 288(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 272(%rdi) +; SSE2-NEXT: movdqa %xmm8, 272(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 256(%rdi) +; SSE2-NEXT: movdqa %xmm8, 256(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 240(%rdi) +; SSE2-NEXT: movdqa %xmm8, 240(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 224(%rdi) +; SSE2-NEXT: movdqa %xmm8, 224(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 208(%rdi) +; SSE2-NEXT: movdqa %xmm8, 208(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 192(%rdi) +; SSE2-NEXT: movdqa %xmm8, 192(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 176(%rdi) +; SSE2-NEXT: movdqa %xmm8, 176(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 160(%rdi) +; SSE2-NEXT: movdqa %xmm8, 160(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 144(%rdi) +; SSE2-NEXT: movdqa %xmm8, 144(%rax) ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 -; SSE2-NEXT: movdqa %xmm8, 128(%rdi) +; SSE2-NEXT: movdqa %xmm8, 128(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm7 -; SSE2-NEXT: movdqa %xmm7, 112(%rdi) +; SSE2-NEXT: movdqa %xmm7, 112(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm6 -; SSE2-NEXT: movdqa %xmm6, 96(%rdi) +; SSE2-NEXT: movdqa %xmm6, 96(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm5 -; SSE2-NEXT: movdqa %xmm5, 80(%rdi) +; SSE2-NEXT: movdqa %xmm5, 80(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm4 -; SSE2-NEXT: movdqa %xmm4, 64(%rdi) +; SSE2-NEXT: movdqa %xmm4, 64(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm3 -; SSE2-NEXT: movdqa %xmm3, 48(%rdi) +; SSE2-NEXT: movdqa %xmm3, 48(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm2 -; SSE2-NEXT: movdqa %xmm2, 32(%rdi) +; SSE2-NEXT: movdqa %xmm2, 32(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm1 -; SSE2-NEXT: movdqa %xmm1, 16(%rdi) +; SSE2-NEXT: movdqa %xmm1, 16(%rax) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm0 -; SSE2-NEXT: movdqa %xmm0, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movdqa %xmm0, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v512i8_3: @@ -1752,6 +1752,7 @@ ; AVX1-NEXT: movq %rsp, %rbp ; AVX1-NEXT: andq $-32, %rsp ; AVX1-NEXT: subq $128, %rsp +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vmovdqa 144(%rbp), %ymm8 ; AVX1-NEXT: vmovdqa 112(%rbp), %ymm9 ; AVX1-NEXT: vmovdqa 80(%rbp), %ymm10 @@ -1859,26 +1860,25 @@ ; AVX1-NEXT: vpavgb %xmm2, %xmm14, %xmm2 ; AVX1-NEXT: vpavgb %xmm8, %xmm15, %xmm8 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm8, %ymm2 -; AVX1-NEXT: vmovaps %ymm2, 480(%rdi) -; AVX1-NEXT: vmovaps %ymm1, 448(%rdi) -; AVX1-NEXT: vmovaps %ymm13, 416(%rdi) -; AVX1-NEXT: vmovaps %ymm0, 384(%rdi) -; AVX1-NEXT: vmovaps %ymm9, 352(%rdi) -; AVX1-NEXT: vmovaps %ymm10, 320(%rdi) -; AVX1-NEXT: vmovaps %ymm11, 288(%rdi) -; AVX1-NEXT: vmovaps %ymm12, 256(%rdi) -; AVX1-NEXT: vmovaps %ymm7, 224(%rdi) -; AVX1-NEXT: vmovaps %ymm6, 192(%rdi) -; AVX1-NEXT: vmovaps %ymm5, 160(%rdi) -; AVX1-NEXT: vmovaps %ymm4, 128(%rdi) -; AVX1-NEXT: vmovaps %ymm3, 96(%rdi) +; AVX1-NEXT: vmovaps %ymm2, 480(%rax) +; AVX1-NEXT: vmovaps %ymm1, 448(%rax) +; AVX1-NEXT: vmovaps %ymm13, 416(%rax) +; AVX1-NEXT: vmovaps %ymm0, 384(%rax) +; AVX1-NEXT: vmovaps %ymm9, 352(%rax) +; AVX1-NEXT: vmovaps %ymm10, 320(%rax) +; AVX1-NEXT: vmovaps %ymm11, 288(%rax) +; AVX1-NEXT: vmovaps %ymm12, 256(%rax) +; AVX1-NEXT: vmovaps %ymm7, 224(%rax) +; AVX1-NEXT: vmovaps %ymm6, 192(%rax) +; AVX1-NEXT: vmovaps %ymm5, 160(%rax) +; AVX1-NEXT: vmovaps %ymm4, 128(%rax) +; AVX1-NEXT: vmovaps %ymm3, 96(%rax) ; AVX1-NEXT: vmovaps (%rsp), %ymm0 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm0, 64(%rdi) +; AVX1-NEXT: vmovaps %ymm0, 64(%rax) ; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm0, 32(%rdi) +; AVX1-NEXT: vmovaps %ymm0, 32(%rax) ; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm0, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vmovaps %ymm0, (%rax) ; AVX1-NEXT: movq %rbp, %rsp ; AVX1-NEXT: popq %rbp ; AVX1-NEXT: vzeroupper @@ -1890,6 +1890,7 @@ ; AVX2-NEXT: movq %rsp, %rbp ; AVX2-NEXT: andq $-32, %rsp ; AVX2-NEXT: subq $32, %rsp +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vmovdqa 240(%rbp), %ymm8 ; AVX2-NEXT: vmovdqa 208(%rbp), %ymm9 ; AVX2-NEXT: vmovdqa 176(%rbp), %ymm10 @@ -1914,23 +1915,22 @@ ; AVX2-NEXT: vpavgb 688(%rbp), %ymm10, %ymm10 ; AVX2-NEXT: vpavgb 720(%rbp), %ymm9, %ymm9 ; AVX2-NEXT: vpavgb 752(%rbp), %ymm8, %ymm8 -; AVX2-NEXT: vmovdqa %ymm8, 480(%rdi) -; AVX2-NEXT: vmovdqa %ymm9, 448(%rdi) -; AVX2-NEXT: vmovdqa %ymm10, 416(%rdi) -; AVX2-NEXT: vmovdqa %ymm11, 384(%rdi) -; AVX2-NEXT: vmovdqa %ymm12, 352(%rdi) -; AVX2-NEXT: vmovdqa %ymm13, 320(%rdi) -; AVX2-NEXT: vmovdqa %ymm14, 288(%rdi) -; AVX2-NEXT: vmovdqa %ymm15, 256(%rdi) -; AVX2-NEXT: vmovdqa %ymm7, 224(%rdi) -; AVX2-NEXT: vmovdqa %ymm6, 192(%rdi) -; AVX2-NEXT: vmovdqa %ymm5, 160(%rdi) -; AVX2-NEXT: vmovdqa %ymm4, 128(%rdi) -; AVX2-NEXT: vmovdqa %ymm3, 96(%rdi) -; AVX2-NEXT: vmovdqa %ymm2, 64(%rdi) -; AVX2-NEXT: vmovdqa %ymm1, 32(%rdi) -; AVX2-NEXT: vmovdqa %ymm0, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vmovdqa %ymm8, 480(%rax) +; AVX2-NEXT: vmovdqa %ymm9, 448(%rax) +; AVX2-NEXT: vmovdqa %ymm10, 416(%rax) +; AVX2-NEXT: vmovdqa %ymm11, 384(%rax) +; AVX2-NEXT: vmovdqa %ymm12, 352(%rax) +; AVX2-NEXT: vmovdqa %ymm13, 320(%rax) +; AVX2-NEXT: vmovdqa %ymm14, 288(%rax) +; AVX2-NEXT: vmovdqa %ymm15, 256(%rax) +; AVX2-NEXT: vmovdqa %ymm7, 224(%rax) +; AVX2-NEXT: vmovdqa %ymm6, 192(%rax) +; AVX2-NEXT: vmovdqa %ymm5, 160(%rax) +; AVX2-NEXT: vmovdqa %ymm4, 128(%rax) +; AVX2-NEXT: vmovdqa %ymm3, 96(%rax) +; AVX2-NEXT: vmovdqa %ymm2, 64(%rax) +; AVX2-NEXT: vmovdqa %ymm1, 32(%rax) +; AVX2-NEXT: vmovdqa %ymm0, (%rax) ; AVX2-NEXT: movq %rbp, %rsp ; AVX2-NEXT: popq %rbp ; AVX2-NEXT: vzeroupper @@ -1942,6 +1942,7 @@ ; AVX512F-NEXT: movq %rsp, %rbp ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $32, %rsp +; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vmovdqa 240(%rbp), %ymm8 ; AVX512F-NEXT: vmovdqa 208(%rbp), %ymm9 ; AVX512F-NEXT: vmovdqa 176(%rbp), %ymm10 @@ -1966,23 +1967,22 @@ ; AVX512F-NEXT: vpavgb 688(%rbp), %ymm10, %ymm10 ; AVX512F-NEXT: vpavgb 720(%rbp), %ymm9, %ymm9 ; AVX512F-NEXT: vpavgb 752(%rbp), %ymm8, %ymm8 -; AVX512F-NEXT: vmovdqa %ymm8, 480(%rdi) -; AVX512F-NEXT: vmovdqa %ymm9, 448(%rdi) -; AVX512F-NEXT: vmovdqa %ymm10, 416(%rdi) -; AVX512F-NEXT: vmovdqa %ymm11, 384(%rdi) -; AVX512F-NEXT: vmovdqa %ymm12, 352(%rdi) -; AVX512F-NEXT: vmovdqa %ymm13, 320(%rdi) -; AVX512F-NEXT: vmovdqa %ymm14, 288(%rdi) -; AVX512F-NEXT: vmovdqa %ymm15, 256(%rdi) -; AVX512F-NEXT: vmovdqa %ymm7, 224(%rdi) -; AVX512F-NEXT: vmovdqa %ymm6, 192(%rdi) -; AVX512F-NEXT: vmovdqa %ymm5, 160(%rdi) -; AVX512F-NEXT: vmovdqa %ymm4, 128(%rdi) -; AVX512F-NEXT: vmovdqa %ymm3, 96(%rdi) -; AVX512F-NEXT: vmovdqa %ymm2, 64(%rdi) -; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdi) -; AVX512F-NEXT: vmovdqa %ymm0, (%rdi) -; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: vmovdqa %ymm8, 480(%rax) +; AVX512F-NEXT: vmovdqa %ymm9, 448(%rax) +; AVX512F-NEXT: vmovdqa %ymm10, 416(%rax) +; AVX512F-NEXT: vmovdqa %ymm11, 384(%rax) +; AVX512F-NEXT: vmovdqa %ymm12, 352(%rax) +; AVX512F-NEXT: vmovdqa %ymm13, 320(%rax) +; AVX512F-NEXT: vmovdqa %ymm14, 288(%rax) +; AVX512F-NEXT: vmovdqa %ymm15, 256(%rax) +; AVX512F-NEXT: vmovdqa %ymm7, 224(%rax) +; AVX512F-NEXT: vmovdqa %ymm6, 192(%rax) +; AVX512F-NEXT: vmovdqa %ymm5, 160(%rax) +; AVX512F-NEXT: vmovdqa %ymm4, 128(%rax) +; AVX512F-NEXT: vmovdqa %ymm3, 96(%rax) +; AVX512F-NEXT: vmovdqa %ymm2, 64(%rax) +; AVX512F-NEXT: vmovdqa %ymm1, 32(%rax) +; AVX512F-NEXT: vmovdqa %ymm0, (%rax) ; AVX512F-NEXT: movq %rbp, %rsp ; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper @@ -1994,6 +1994,7 @@ ; AVX512BW-NEXT: movq %rsp, %rbp ; AVX512BW-NEXT: andq $-64, %rsp ; AVX512BW-NEXT: subq $64, %rsp +; AVX512BW-NEXT: movq %rdi, %rax ; AVX512BW-NEXT: vpavgb 16(%rbp), %zmm0, %zmm0 ; AVX512BW-NEXT: vpavgb 80(%rbp), %zmm1, %zmm1 ; AVX512BW-NEXT: vpavgb 144(%rbp), %zmm2, %zmm2 @@ -2002,15 +2003,14 @@ ; AVX512BW-NEXT: vpavgb 336(%rbp), %zmm5, %zmm5 ; AVX512BW-NEXT: vpavgb 400(%rbp), %zmm6, %zmm6 ; AVX512BW-NEXT: vpavgb 464(%rbp), %zmm7, %zmm7 -; AVX512BW-NEXT: vmovdqa64 %zmm7, 448(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm6, 384(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm5, 320(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm4, 256(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm3, 192(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm2, 128(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm1, 64(%rdi) -; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdi) -; AVX512BW-NEXT: movq %rdi, %rax +; AVX512BW-NEXT: vmovdqa64 %zmm7, 448(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm6, 384(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm5, 320(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm4, 256(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm3, 192(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm2, 128(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm1, 64(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rax) ; AVX512BW-NEXT: movq %rbp, %rsp ; AVX512BW-NEXT: popq %rbp ; AVX512BW-NEXT: vzeroupper Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -122,8 +122,8 @@ ; pass parameters in registers for 64-bit platform ; X64-LABEL: test_int -; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi +; X64: leal {{.*}}, %edi ; X64: call ; X64: addl {{.*}}, %eax define i32 @test_int(i32 %a, i32 %b) nounwind { Index: test/CodeGen/X86/avx-vinsertf128.ll =================================================================== --- test/CodeGen/X86/avx-vinsertf128.ll +++ test/CodeGen/X86/avx-vinsertf128.ll @@ -75,8 +75,7 @@ define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: insert_undef_pd: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) ret <4 x double> %res @@ -86,8 +85,7 @@ define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: insert_undef_ps: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) ret <8 x float> %res @@ -97,8 +95,7 @@ define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: insert_undef_si: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) ret <8 x i32> %res Index: test/CodeGen/X86/avx512-arith.ll =================================================================== --- test/CodeGen/X86/avx512-arith.ll +++ test/CodeGen/X86/avx512-arith.ll @@ -904,9 +904,9 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_broadcast_vaddpd: ; CHECK: # %bb.0: -; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 +; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} ; CHECK-NEXT: retq double* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -272,9 +272,9 @@ define i32 @test10(i32 %a, i32 %b, i1 %cond) { ; ALL_X64-LABEL: test10: ; ALL_X64: ## %bb.0: -; ALL_X64-NEXT: testb $1, %dl -; ALL_X64-NEXT: cmovel %esi, %edi ; ALL_X64-NEXT: movl %edi, %eax +; ALL_X64-NEXT: testb $1, %dl +; ALL_X64-NEXT: cmovel %esi, %eax ; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test10: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -195,21 +195,21 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test12: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test12: ; SKX: ## %bb.0: -; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi ; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <16 x i64> %a, %b @@ -257,23 +257,23 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test14: ; KNL: ## %bb.0: +; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; KNL-NEXT: kshiftrw $4, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi -; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test14: ; SKX: ## %bb.0: +; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; SKX-NEXT: kshiftrb $4, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi -; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <8 x i64> %a, %b Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3741,9 +3741,10 @@ define i16 @test_kand(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kand: ; CHECK: ## %bb.0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: andl $8, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: andl $8, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) @@ -3754,9 +3755,10 @@ define i16 @test_kandn(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kandn: ; CHECK: ## %bb.0: -; CHECK-NEXT: orl $-9, %edi -; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl $-9, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1) @@ -3767,8 +3769,9 @@ define i16 @test_knot(i16 %a0) { ; CHECK-LABEL: test_knot: ; CHECK: ## %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) ret i16 %res @@ -3778,9 +3781,10 @@ define i16 @test_kor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kor: ; CHECK: ## %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: orl $8, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: orl $8, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1) @@ -3793,9 +3797,10 @@ define i16 @test_kxnor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kxnor: ; CHECK: ## %bb.0: -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: xorl $8, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: xorl $8, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1) @@ -3806,9 +3811,10 @@ define i16 @test_kxor(i16 %a0, i16 %a1) { ; CHECK-LABEL: test_kxor: ; CHECK: ## %bb.0: -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: xorl $8, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: xorl $8, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1) Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -8,8 +8,9 @@ define i16 @mask16(i16 %x) { ; CHECK-LABEL: mask16: ; CHECK: ## %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -33,8 +34,9 @@ define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: ## %bb.0: -; CHECK-NEXT: notb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notb %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -106,10 +108,11 @@ ; CHECK-LABEL: mand16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -907,47 +907,46 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: subl $20, %esp ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: subl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: imull %ebp, %edx -; X32-NEXT: subl %esi, %ebx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: subl %ecx, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: imull %ebp, %ebx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: subl %edi, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %ebp ; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax ; X32-NEXT: imull %ebp, %eax -; X32-NEXT: addl %eax, %edx +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl (%esp), %ebp # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi -; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: imull %eax, %esi +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: imull %ebp, %edx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl %edx, %eax +; X32-NEXT: imull %edi, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: addl $20, %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -947,16 +947,16 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_broadcast_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] +; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -6671,14 +6671,16 @@ define i16 @mask16(i16 %x) { ; GENERIC-LABEL: mask16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: notl %edi # sched: [1:0.33] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: notl %eax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mask16: ; SKX: # %bb.0: -; SKX-NEXT: notl %edi # sched: [1:0.25] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: notl %eax # sched: [1:0.25] +; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -6708,14 +6710,16 @@ define i8 @mask8(i8 %x) { ; GENERIC-LABEL: mask8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: notb %dil # sched: [1:0.33] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: notb %al # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mask8: ; SKX: # %bb.0: -; SKX-NEXT: notb %dil # sched: [1:0.25] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: notb %al # sched: [1:0.25] +; SKX-NEXT: # kill: def $al killed $al killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -6790,19 +6794,21 @@ ; GENERIC-LABEL: mand16: ; GENERIC: # %bb.0: ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33] -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %eax, %ecx # sched: [1:0.33] +; GENERIC-NEXT: xorl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: andl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mand16: ; SKX: # %bb.0: ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] -; SKX-NEXT: andl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: orl %eax, %edi # sched: [1:0.25] -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %eax, %ecx # sched: [1:0.25] +; SKX-NEXT: xorl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: andl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -134,8 +134,9 @@ ; ; X64-LABEL: select05: ; X64: # %bb.0: -; X64-NEXT: orl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -184,8 +185,9 @@ ; ; X64-LABEL: select06: ; X64: # %bb.0: -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> Index: test/CodeGen/X86/avx512bw-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512bw-mask-op.ll +++ test/CodeGen/X86/avx512bw-mask-op.ll @@ -4,8 +4,8 @@ define i32 @mask32(i32 %x) { ; CHECK-LABEL: mask32: ; CHECK: ## %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax ; CHECK-NEXT: retq %m0 = bitcast i32 %x to <32 x i1> %m1 = xor <32 x i1> %m0, %m1 = xor <64 x i1> %m0, %mb = bitcast i32 %y to <32 x i1> @@ -116,10 +116,10 @@ ; CHECK-LABEL: mand64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: andq %rsi, %rax -; CHECK-NEXT: xorq %rsi, %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: andq %rsi, %rcx +; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq %ma = bitcast i64 %x to <64 x i1> %mb = bitcast i64 %y to <64 x i1> Index: test/CodeGen/X86/avx512dq-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512dq-mask-op.ll +++ test/CodeGen/X86/avx512dq-mask-op.ll @@ -4,8 +4,9 @@ define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: ## %bb.0: -; CHECK-NEXT: notb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notb %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -32,10 +33,11 @@ ; CHECK-LABEL: mand8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %ma = bitcast i8 %x to <8 x i1> %mb = bitcast i8 %y to <8 x i1> Index: test/CodeGen/X86/avx512vl-arith.ll =================================================================== --- test/CodeGen/X86/avx512vl-arith.ll +++ test/CodeGen/X86/avx512vl-arith.ll @@ -408,9 +408,9 @@ define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_256: ; CHECK: ## %bb.0: -; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] -; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x0f] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] +; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] +; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -835,9 +835,9 @@ define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_128: ; CHECK: ## %bb.0: -; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] -; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x0f] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] +; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] +; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %tmp = load double, double* %j Index: test/CodeGen/X86/bigstructret.ll =================================================================== --- test/CodeGen/X86/bigstructret.ll +++ test/CodeGen/X86/bigstructret.ll @@ -8,20 +8,20 @@ define fastcc %0 @ReturnBigStruct() nounwind readnone { ; X86-LABEL: ReturnBigStruct: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $24601, 12(%ecx) # imm = 0x6019 -; X86-NEXT: movl $48, 8(%ecx) -; X86-NEXT: movl $24, 4(%ecx) -; X86-NEXT: movl $12, (%ecx) ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $24601, 12(%eax) # imm = 0x6019 +; X86-NEXT: movl $48, 8(%eax) +; X86-NEXT: movl $24, 4(%eax) +; X86-NEXT: movl $12, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $105660490448944, %rax # imm = 0x601900000030 -; X64-NEXT: movq %rax, 8(%rdi) -; X64-NEXT: movabsq $103079215116, %rax # imm = 0x180000000C -; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $105660490448944, %rcx # imm = 0x601900000030 +; X64-NEXT: movq %rcx, 8(%rax) +; X64-NEXT: movabsq $103079215116, %rcx # imm = 0x180000000C +; X64-NEXT: movq %rcx, (%rax) ; X64-NEXT: retq entry: %0 = insertvalue %0 zeroinitializer, i32 12, 0 @@ -35,18 +35,18 @@ define fastcc %1 @ReturnBigStruct2() nounwind readnone { ; X86-LABEL: ReturnBigStruct2: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $48, 4(%ecx) -; X86-NEXT: movb $1, 2(%ecx) -; X86-NEXT: movw $256, (%ecx) # imm = 0x100 ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $48, 4(%eax) +; X86-NEXT: movb $1, 2(%eax) +; X86-NEXT: movw $256, (%eax) # imm = 0x100 ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct2: ; X64: # %bb.0: # %entry -; X64-NEXT: movl $48, 4(%rdi) -; X64-NEXT: movb $1, 2(%rdi) -; X64-NEXT: movw $256, (%rdi) # imm = 0x100 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl $48, 4(%rax) +; X64-NEXT: movb $1, 2(%rax) +; X64-NEXT: movw $256, (%rax) # imm = 0x100 ; X64-NEXT: retq entry: %0 = insertvalue %1 zeroinitializer, i1 false, 0 Index: test/CodeGen/X86/bitcast-i256.ll =================================================================== --- test/CodeGen/X86/bitcast-i256.ll +++ test/CodeGen/X86/bitcast-i256.ll @@ -5,16 +5,16 @@ define i256 @foo(<8 x i32> %a) { ; FAST-LABEL: foo: ; FAST: # %bb.0: -; FAST-NEXT: vmovups %ymm0, (%rdi) ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: vmovups %ymm0, (%rax) ; FAST-NEXT: vzeroupper ; FAST-NEXT: retq ; ; SLOW-LABEL: foo: ; SLOW: # %bb.0: -; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi) -; SLOW-NEXT: vmovups %xmm0, (%rdi) ; SLOW-NEXT: movq %rdi, %rax +; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rax) +; SLOW-NEXT: vmovups %xmm0, (%rax) ; SLOW-NEXT: vzeroupper ; SLOW-NEXT: retq %r = bitcast <8 x i32> %a to i256 Index: test/CodeGen/X86/bitcast-int-to-vector-bool.ll =================================================================== --- test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -193,8 +193,8 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i32_32i1: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movl %esi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movl %esi, (%rax) ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: bitcast_i32_32i1: @@ -250,14 +250,14 @@ define <64 x i1> @bitcast_i64_64i1(i64 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i64_64i1: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movq %rsi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movq %rsi, (%rax) ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_i64_64i1: ; AVX12: # %bb.0: -; AVX12-NEXT: movq %rsi, (%rdi) ; AVX12-NEXT: movq %rdi, %rax +; AVX12-NEXT: movq %rsi, (%rax) ; AVX12-NEXT: retq ; ; AVX512-LABEL: bitcast_i64_64i1: Index: test/CodeGen/X86/bitreverse.ll =================================================================== --- test/CodeGen/X86/bitreverse.ll +++ test/CodeGen/X86/bitreverse.ll @@ -341,20 +341,21 @@ ; ; X64-LABEL: test_bitreverse_i8: ; X64: # %bb.0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $85, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-86, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $85, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-86, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -384,21 +385,22 @@ ; ; X64-LABEL: test_bitreverse_i4: ; X64: # %bb.0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $80, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-96, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: shrb $4, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $80, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-96, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: shrb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b @@ -474,6 +476,7 @@ ; X64-LABEL: identity_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) %c = call i8 @llvm.bitreverse.i8(i8 %b) Index: test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll @@ -10,9 +10,9 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test__andn_u64: ; X64: # %bb.0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 @@ -84,9 +84,9 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test_andn_u64: ; X64: # %bb.0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 Index: test/CodeGen/X86/bmi-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -47,9 +47,9 @@ ; ; X64-LABEL: test__andn_u32: ; X64: # %bb.0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 @@ -199,9 +199,9 @@ ; ; X64-LABEL: test_andn_u32: ; X64: # %bb.0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -420,9 +420,9 @@ define i32 @non_bextr32(i32 %x) { ; CHECK-LABEL: non_bextr32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shrl $2, %edi -; CHECK-NEXT: andl $111, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: andl $111, %eax ; CHECK-NEXT: retq entry: %shr = lshr i32 %x, 2 @@ -446,8 +446,9 @@ define i32 @bzhi32b(i32 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi32b: ; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl %edi, %eax @@ -468,8 +469,9 @@ define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) { ; BMI1-LABEL: bzhi32b_load: ; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl (%rdi), %eax @@ -491,8 +493,9 @@ define i32 @bzhi32c(i32 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi32c: ; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx ; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: decl %eax ; BMI1-NEXT: andl %edi, %eax @@ -535,12 +538,12 @@ define i32 @bzhi32e(i32 %a, i32 %b) { ; BMI1-LABEL: bzhi32e: ; BMI1: # %bb.0: # %entry +; BMI1-NEXT: movl %edi, %eax ; BMI1-NEXT: movl $32, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shll %cl, %edi +; BMI1-NEXT: shll %cl, %eax ; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrl %cl, %edi -; BMI1-NEXT: movl %edi, %eax +; BMI1-NEXT: shrl %cl, %eax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi32e: @@ -557,8 +560,9 @@ define i64 @bzhi64b(i64 %x, i8 zeroext %index) { ; BMI1-LABEL: bzhi64b: ; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax ; BMI1-NEXT: movl %esi, %ecx +; BMI1-NEXT: movl $1, %eax +; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx ; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: decq %rax ; BMI1-NEXT: andq %rdi, %rax @@ -626,12 +630,12 @@ define i64 @bzhi64e(i64 %a, i64 %b) { ; BMI1-LABEL: bzhi64e: ; BMI1: # %bb.0: # %entry +; BMI1-NEXT: movq %rdi, %rax ; BMI1-NEXT: movl $64, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi +; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax +; BMI1-NEXT: shrq %cl, %rax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi64e: @@ -648,12 +652,12 @@ define i64 @bzhi64f(i64 %a, i32 %b) { ; BMI1-LABEL: bzhi64f: ; BMI1: # %bb.0: # %entry +; BMI1-NEXT: movq %rdi, %rax ; BMI1-NEXT: movl $64, %ecx ; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi +; BMI1-NEXT: shlq %cl, %rax ; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax +; BMI1-NEXT: shrq %cl, %rax ; BMI1-NEXT: retq ; ; BMI2-LABEL: bzhi64f: @@ -707,8 +711,8 @@ define i64 @bzhi64_small_constant_mask(i64 %x) { ; CHECK-LABEL: bzhi64_small_constant_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: retq entry: %and = and i64 %x, 2147483647 Index: test/CodeGen/X86/bool-simplify.ll =================================================================== --- test/CodeGen/X86/bool-simplify.ll +++ test/CodeGen/X86/bool-simplify.ll @@ -4,9 +4,9 @@ define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: ptest %xmm0, %xmm0 -; CHECK-NEXT: cmovnel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ptest %xmm0, %xmm0 +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c) %t2 = icmp ne i32 %t1, 0 Index: test/CodeGen/X86/bswap-rotate.ll =================================================================== --- test/CodeGen/X86/bswap-rotate.ll +++ test/CodeGen/X86/bswap-rotate.ll @@ -14,8 +14,9 @@ ; ; X64-LABEL: combine_bswap_rotate: ; X64: # %bb.0: -; X64-NEXT: rolw $9, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $9, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %1 = call i16 @llvm.bswap.i16(i16 %a0) %2 = shl i16 %1, 1 Index: test/CodeGen/X86/bswap-wide-int.ll =================================================================== --- test/CodeGen/X86/bswap-wide-int.ll +++ test/CodeGen/X86/bswap-wide-int.ll @@ -25,14 +25,14 @@ ; ; X64-LABEL: bswap_i64: ; X64: # %bb.0: -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i64: ; X64-MOVBE: # %bb.0: -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: bswapq %rax ; X64-MOVBE-NEXT: retq %1 = call i64 @llvm.bswap.i64(i64 %a0) ret i64 %1 @@ -79,17 +79,17 @@ ; ; X64-LABEL: bswap_i128: ; X64: # %bb.0: -; X64-NEXT: bswapq %rsi -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rdx ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i128: ; X64-MOVBE: # %bb.0: -; X64-MOVBE-NEXT: bswapq %rsi -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rsi, %rax +; X64-MOVBE-NEXT: bswapq %rax +; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rdx ; X64-MOVBE-NEXT: retq %1 = call i128 @llvm.bswap.i128(i128 %a0) @@ -149,24 +149,24 @@ ; ; X64-LABEL: bswap_i256: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: bswapq %r8 ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx ; X64-NEXT: bswapq %rsi -; X64-NEXT: movq %rsi, 24(%rdi) -; X64-NEXT: movq %rdx, 16(%rdi) -; X64-NEXT: movq %rcx, 8(%rdi) -; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, 24(%rax) +; X64-NEXT: movq %rdx, 16(%rax) +; X64-NEXT: movq %rcx, 8(%rax) +; X64-NEXT: movq %r8, (%rax) ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i256: ; X64-MOVBE: # %bb.0: -; X64-MOVBE-NEXT: movbeq %rsi, 24(%rdi) -; X64-MOVBE-NEXT: movbeq %rdx, 16(%rdi) -; X64-MOVBE-NEXT: movbeq %rcx, 8(%rdi) -; X64-MOVBE-NEXT: movbeq %r8, (%rdi) ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: movbeq %rsi, 24(%rax) +; X64-MOVBE-NEXT: movbeq %rdx, 16(%rax) +; X64-MOVBE-NEXT: movbeq %rcx, 8(%rax) +; X64-MOVBE-NEXT: movbeq %r8, (%rax) ; X64-MOVBE-NEXT: retq %1 = call i256 @llvm.bswap.i256(i256 %a0) ret i256 %1 Index: test/CodeGen/X86/bswap.ll =================================================================== --- test/CodeGen/X86/bswap.ll +++ test/CodeGen/X86/bswap.ll @@ -19,8 +19,9 @@ ; ; CHECK64-LABEL: W: ; CHECK64: # %bb.0: -; CHECK64-NEXT: rolw $8, %di ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: rolw $8, %ax +; CHECK64-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK64-NEXT: retq %Z = call i16 @llvm.bswap.i16( i16 %A ) ; [#uses=1] ret i16 %Z @@ -35,8 +36,8 @@ ; ; CHECK64-LABEL: X: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapl %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax ; CHECK64-NEXT: retq %Z = call i32 @llvm.bswap.i32( i32 %A ) ; [#uses=1] ret i32 %Z @@ -53,8 +54,8 @@ ; ; CHECK64-LABEL: Y: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapq %rdi ; CHECK64-NEXT: movq %rdi, %rax +; CHECK64-NEXT: bswapq %rax ; CHECK64-NEXT: retq %Z = call i64 @llvm.bswap.i64( i64 %A ) ; [#uses=1] ret i64 %Z @@ -71,9 +72,9 @@ ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: shrl $16, %eax ; CHECK64-NEXT: retq entry: @@ -95,9 +96,9 @@ ; ; CHECK64-LABEL: test2: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: sarl $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: sarl $16, %eax ; CHECK64-NEXT: retq entry: Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- test/CodeGen/X86/bswap_tree.ll +++ test/CodeGen/X86/bswap_tree.ll @@ -20,9 +20,9 @@ ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 @@ -53,9 +53,9 @@ ; ; CHECK64-LABEL: test2: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte1 = shl i32 %x, 8 %byte0 = lshr i32 %x, 8 Index: test/CodeGen/X86/bswap_tree2.ll =================================================================== --- test/CodeGen/X86/bswap_tree2.ll +++ test/CodeGen/X86/bswap_tree2.ll @@ -25,16 +25,16 @@ ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: orl %eax, %ecx -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %ecx, %edi -; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: movl %eax, %ecx +; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 +; CHECK64-NEXT: movl %eax, %edx +; CHECK64-NEXT: orl $-16777216, %edx # imm = 0xFF000000 +; CHECK64-NEXT: shll $8, %ecx +; CHECK64-NEXT: shrl $8, %edx +; CHECK64-NEXT: orl %ecx, %edx +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: shrl $16, %eax +; CHECK64-NEXT: orl %edx, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 Index: test/CodeGen/X86/bt.ll =================================================================== --- test/CodeGen/X86/bt.ll +++ test/CodeGen/X86/bt.ll @@ -1112,16 +1112,16 @@ ; ; X64-LABEL: demanded_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edx, %eax -; X64-NEXT: shrl $5, %eax -; X64-NEXT: movl (%rdi,%rax,4), %r8d -; X64-NEXT: movl $1, %edi ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: btl %edx, %r8d +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: shrl $5, %eax +; X64-NEXT: movl (%rdi,%rax,4), %edi +; X64-NEXT: movl $1, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: btl %ecx, %edi ; X64-NEXT: jae .LBB30_2 ; X64-NEXT: # %bb.1: -; X64-NEXT: orl %edi, (%rsi,%rax,4) +; X64-NEXT: orl %edx, (%rsi,%rax,4) ; X64-NEXT: .LBB30_2: ; X64-NEXT: retq %4 = lshr i32 %2, 5 Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -8,17 +8,17 @@ ; CHECK-LABEL: Test_get_quotient: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-NEXT: retq @@ -30,21 +30,20 @@ ; CHECK-LABEL: Test_get_remainder: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: def $edx killed $edx def $rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %result = srem i64 %a, %b ret i64 %result @@ -54,18 +53,18 @@ ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: # kill: def $eax killed $eax def $rax Index: test/CodeGen/X86/cmov-into-branch.ll =================================================================== --- test/CodeGen/X86/cmov-into-branch.ll +++ test/CodeGen/X86/cmov-into-branch.ll @@ -5,9 +5,9 @@ define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: ucomisd (%rdi), %xmm0 -; CHECK-NEXT: cmovbel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: ucomisd (%rdi), %xmm0 +; CHECK-NEXT: cmovbel %edx, %eax ; CHECK-NEXT: retq %load = load double, double* %b, align 8 %cmp = fcmp olt double %load, %a @@ -19,9 +19,9 @@ define i32 @test2(double %a, double %b, i32 %x, i32 %y) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovbel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq %cmp = fcmp ogt double %a, %b %cond = select i1 %cmp, i32 %x, i32 %y @@ -48,10 +48,10 @@ define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %edi, (%rsi) -; CHECK-NEXT: cmoval %edi, %ecx -; CHECK-NEXT: cmovael %edx, %ecx ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cmpl %edi, (%rsi) +; CHECK-NEXT: cmoval %edi, %eax +; CHECK-NEXT: cmovael %edx, %eax ; CHECK-NEXT: retq %load = load i32, i32* %b, align 4 %cmp = icmp ult i32 %load, %a @@ -83,9 +83,9 @@ define i32 @weighted_select1(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select1: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 @@ -96,12 +96,12 @@ define i32 @weighted_select2(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select2: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # %bb.1: # %select.false -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: .LBB6_2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !1 @@ -115,14 +115,13 @@ define i32 @weighted_select3(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select3: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB7_1 ; CHECK-NEXT: # %bb.2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB7_1: # %select.false -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2 @@ -133,9 +132,9 @@ define i32 @unweighted_select(i32 %a, i32 %b) { ; CHECK-LABEL: unweighted_select: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !3 Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -194,12 +194,13 @@ define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %d = select i1 %c, i8 %a, i8 %b ret i8 %d Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i32: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovnel %esi, %edi -; CMOV-NEXT: cmovpl %esi, %edi ; CMOV-NEXT: movl %edi, %eax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovnel %esi, %eax +; CMOV-NEXT: cmovpl %esi, %eax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -36,10 +36,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rsi, %rdi -; CMOV-NEXT: cmovpq %rsi, %rdi ; CMOV-NEXT: movq %rdi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rsi, %rax +; CMOV-NEXT: cmovpq %rsi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -64,10 +64,10 @@ ; CHECK-LABEL: test_select_fcmp_une_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rdi, %rsi -; CMOV-NEXT: cmovpq %rdi, %rsi ; CMOV-NEXT: movq %rsi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rdi, %rax +; CMOV-NEXT: cmovpq %rdi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) Index: test/CodeGen/X86/cmp.ll =================================================================== --- test/CodeGen/X86/cmp.ll +++ test/CodeGen/X86/cmp.ll @@ -268,9 +268,9 @@ define i32 @test13(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] -; CHECK-NEXT: cmovnel %edx, %esi # encoding: [0x0f,0x45,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] +; CHECK-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %mask, 8 @@ -283,9 +283,9 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] -; CHECK-NEXT: cmovnsl %edx, %esi # encoding: [0x0f,0x49,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] +; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %s = lshr i32 %mask, 7 Index: test/CodeGen/X86/combine-add.ll =================================================================== --- test/CodeGen/X86/combine-add.ll +++ test/CodeGen/X86/combine-add.ll @@ -103,8 +103,8 @@ define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add0: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add0: @@ -121,8 +121,8 @@ define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add1: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add1: @@ -139,8 +139,8 @@ define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add2: ; SSE: # %bb.0: -; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: paddd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add2: @@ -157,8 +157,8 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add3: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add3: @@ -203,9 +203,9 @@ ; ; AVX-LABEL: combine_vec_add_uniquebits: ; AVX: # %bb.0: -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680] ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855] ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq Index: test/CodeGen/X86/combine-sdiv.ll =================================================================== --- test/CodeGen/X86/combine-sdiv.ll +++ test/CodeGen/X86/combine-sdiv.ll @@ -28,8 +28,8 @@ define i32 @combine_sdiv_by_negone(i32 %x) { ; CHECK-LABEL: combine_sdiv_by_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %1 = sdiv i32 %x, -1 ret i32 %1 Index: test/CodeGen/X86/combine-udiv.ll =================================================================== --- test/CodeGen/X86/combine-udiv.ll +++ test/CodeGen/X86/combine-udiv.ll @@ -25,9 +25,9 @@ define i32 @combine_udiv_dupe(i32 %x) { ; CHECK-LABEL: combine_udiv_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %edi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %eax ; CHECK-NEXT: retq %1 = udiv i32 %x, %x ret i32 %1 Index: test/CodeGen/X86/combine-urem.ll =================================================================== --- test/CodeGen/X86/combine-urem.ll +++ test/CodeGen/X86/combine-urem.ll @@ -31,9 +31,9 @@ define i32 @combine_urem_dupe(i32 %x) { ; CHECK-LABEL: combine_urem_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %edi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %eax ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %1 = urem i32 %x, %x Index: test/CodeGen/X86/conditional-indecrement.ll =================================================================== --- test/CodeGen/X86/conditional-indecrement.ll +++ test/CodeGen/X86/conditional-indecrement.ll @@ -4,9 +4,9 @@ define i32 @test1(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -17,9 +17,9 @@ define i32 @test1_commute(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -30,9 +30,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -43,9 +43,9 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -56,9 +56,9 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -69,9 +69,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -82,9 +82,9 @@ define i32 @test6(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -95,9 +95,9 @@ define i32 @test7(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -108,9 +108,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -94,8 +94,8 @@ ; X32: # %bb.0: ; X32-NEXT: movl $365384439, %eax # imm = 0x15C752F7 ; X32-NEXT: mull {{[0-9]+}}(%esp) -; X32-NEXT: shrl $27, %edx ; X32-NEXT: movl %edx, %eax +; X32-NEXT: shrl $27, %eax ; X32-NEXT: retl ; ; X64-LABEL: test5: @@ -216,9 +216,9 @@ ; ; X64-LABEL: testsize1: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $32 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -239,9 +239,9 @@ ; ; X64-LABEL: testsize2: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -259,8 +259,8 @@ ; ; X64-LABEL: testsize3: ; X64: # %bb.0: # %entry -; X64-NEXT: shrl $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $5, %eax ; X64-NEXT: retq entry: %div = udiv i32 %x, 32 @@ -279,10 +279,10 @@ ; ; X64-LABEL: testsize4: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: divl %ecx ; X64-NEXT: retq entry: @@ -298,31 +298,30 @@ ; X32-NEXT: pushl $12345 # imm = 0x3039 ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: pushl {{[0-9]+}}(%esp) -; X32-NEXT: calll __umoddi3 +; X32-NEXT: calll __umoddi3@PLT ; X32-NEXT: addl $16, %esp ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $7 ; X32-NEXT: pushl %edx ; X32-NEXT: pushl %eax -; X32-NEXT: calll __udivdi3 +; X32-NEXT: calll __udivdi3@PLT ; X32-NEXT: addl $28, %esp ; X32-NEXT: retl ; ; X64-LABEL: PR23590: ; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx +; X64-NEXT: movabsq $6120523590596543007, %rcx # imm = 0x54F077C718E7C21F +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: shrq $12, %rdx ; X64-NEXT: imulq $12345, %rdx, %rax # imm = 0x3039 -; X64-NEXT: subq %rax, %rcx -; X64-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: leaq (%rcx,%rdx), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: leaq (%rdi,%rdx), %rax ; X64-NEXT: shrq $2, %rax ; X64-NEXT: retq entry: Index: test/CodeGen/X86/divrem.ll =================================================================== --- test/CodeGen/X86/divrem.ll +++ test/CodeGen/X86/divrem.ll @@ -15,7 +15,7 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: pushl {{[0-9]+}}(%esp) -; X32-NEXT: calll __divdi3 +; X32-NEXT: calll __divdi3@PLT ; X32-NEXT: addl $16, %esp ; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl %edx, %edi @@ -23,7 +23,7 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: pushl {{[0-9]+}}(%esp) -; X32-NEXT: calll __moddi3 +; X32-NEXT: calll __moddi3@PLT ; X32-NEXT: addl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %edi, 4(%ecx) @@ -101,6 +101,7 @@ ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: cwtd ; X64-NEXT: idivw %si ; X64-NEXT: movw %ax, (%r8) @@ -131,6 +132,7 @@ ; X64-LABEL: si8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %esi @@ -157,7 +159,7 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: pushl {{[0-9]+}}(%esp) -; X32-NEXT: calll __udivdi3 +; X32-NEXT: calll __udivdi3@PLT ; X32-NEXT: addl $16, %esp ; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl %edx, %edi @@ -165,7 +167,7 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: pushl {{[0-9]+}}(%esp) -; X32-NEXT: calll __umoddi3 +; X32-NEXT: calll __umoddi3@PLT ; X32-NEXT: addl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %edi, 4(%ecx) @@ -182,8 +184,8 @@ ; X64-LABEL: ui64: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rsi ; X64-NEXT: movq %rax, (%r8) ; X64-NEXT: movq %rdx, (%rcx) @@ -212,8 +214,8 @@ ; X64-LABEL: ui32: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movl %eax, (%r8) ; X64-NEXT: movl %edx, (%rcx) @@ -242,8 +244,9 @@ ; X64-LABEL: ui16: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: divw %si ; X64-NEXT: movw %ax, (%r8) ; X64-NEXT: movw %dx, (%rcx) Index: test/CodeGen/X86/divrem8_ext.ll =================================================================== --- test/CodeGen/X86/divrem8_ext.ll +++ test/CodeGen/X86/divrem8_ext.ll @@ -112,6 +112,7 @@ ; X64-LABEL: test_sdivrem_sext_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %ecx @@ -137,6 +138,7 @@ ; X64-LABEL: test_srem_sext_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax @@ -161,6 +163,7 @@ ; X64-LABEL: test_srem_noext_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax @@ -186,6 +189,7 @@ ; X64-LABEL: test_srem_sext64_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax Index: test/CodeGen/X86/fast-isel-fold-mem.ll =================================================================== --- test/CodeGen/X86/fast-isel-fold-mem.ll +++ test/CodeGen/X86/fast-isel-fold-mem.ll @@ -3,8 +3,8 @@ define i64 @fold_load(i64* %a, i64 %b) { ; CHECK-LABEL: fold_load -; CHECK: addq (%rdi), %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK: movq %rsi, %rax +; CHECK-NEXT: addq (%rdi), %rax %1 = load i64, i64* %a, align 8 %2 = add i64 %1, %b ret i64 %2 Index: test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov.ll +++ test/CodeGen/X86/fast-isel-select-cmov.ll @@ -31,9 +31,9 @@ define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) { ; CHECK-LABEL: select_cmov_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmovel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %1 = select i1 %cond, i32 %a, i32 %b ret i32 %1 @@ -42,9 +42,9 @@ define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) { ; CHECK-LABEL: select_cmp_cmov_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: cmovbl %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: retq %1 = icmp ult i32 %a, %b %2 = select i1 %1, i32 %a, i32 %b @@ -54,9 +54,9 @@ define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) { ; CHECK-LABEL: select_cmov_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmoveq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rax ; CHECK-NEXT: retq %1 = select i1 %cond, i64 %a, i64 %b ret i64 %1 @@ -65,9 +65,9 @@ define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) { ; CHECK-LABEL: select_cmp_cmov_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rdi, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: cmovbq %rdi, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %a, i64 %b Index: test/CodeGen/X86/fast-isel-select-cmov2.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov2.ll +++ test/CodeGen/X86/fast-isel-select-cmov2.ll @@ -19,30 +19,30 @@ define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) { ; SDAG-LABEL: select_fcmp_oeq_cmov: ; SDAG: ## %bb.0: -; SDAG-NEXT: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rsi, %rdi -; SDAG-NEXT: cmovpq %rsi, %rdi ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rsi, %rax +; SDAG-NEXT: cmovpq %rsi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: select_fcmp_oeq_cmov: ; FAST: ## %bb.0: -; FAST-NEXT: ucomisd %xmm1, %xmm0 -; FAST-NEXT: setnp %al -; FAST-NEXT: sete %cl -; FAST-NEXT: testb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: ucomisd %xmm1, %xmm0 +; FAST-NEXT: setnp %cl +; FAST-NEXT: sete %dl +; FAST-NEXT: testb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax ; FAST-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_oeq_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: setnp %al -; FAST_AVX-NEXT: sete %cl -; FAST_AVX-NEXT: testb %al, %cl -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: setnp %cl +; FAST_AVX-NEXT: sete %dl +; FAST_AVX-NEXT: testb %cl, %dl +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp oeq double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -52,16 +52,16 @@ define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ogt_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovbeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovbeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ogt_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovbeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovbeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ogt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -71,16 +71,16 @@ define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_oge_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovbq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovbq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_oge_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovbq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovbq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp oge double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -90,16 +90,16 @@ define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_olt_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovbeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovbeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_olt_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovbeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovbeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp olt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -109,16 +109,16 @@ define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ole_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovbq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovbq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ole_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovbq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovbq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ole double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -128,16 +128,16 @@ define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_one_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmoveq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmoveq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_one_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp one double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -147,16 +147,16 @@ define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ord_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovpq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovpq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ord_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovpq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovpq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ord double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -166,16 +166,16 @@ define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_uno_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovnpq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovnpq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_uno_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovnpq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovnpq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp uno double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -185,16 +185,16 @@ define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ueq_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovneq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovneq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ueq_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovneq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovneq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ueq double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -204,16 +204,16 @@ define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ugt_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovaeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovaeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ugt_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovaeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovaeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ugt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -223,16 +223,16 @@ define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_uge_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovaq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovaq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_uge_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovaq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovaq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp uge double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -242,16 +242,16 @@ define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ult_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovaeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovaeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ult_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovaeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovaeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ult double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -261,16 +261,16 @@ define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ule_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovaq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovaq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ule_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovaq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovaq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ule double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -280,30 +280,30 @@ define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) { ; SDAG-LABEL: select_fcmp_une_cmov: ; SDAG: ## %bb.0: -; SDAG-NEXT: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rdi, %rsi -; SDAG-NEXT: cmovpq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rdi, %rax +; SDAG-NEXT: cmovpq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: select_fcmp_une_cmov: ; FAST: ## %bb.0: -; FAST-NEXT: ucomisd %xmm1, %xmm0 -; FAST-NEXT: setp %al -; FAST-NEXT: setne %cl -; FAST-NEXT: orb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: ucomisd %xmm1, %xmm0 +; FAST-NEXT: setp %cl +; FAST-NEXT: setne %dl +; FAST-NEXT: orb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax ; FAST-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_une_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: setp %al -; FAST_AVX-NEXT: setne %cl -; FAST_AVX-NEXT: orb %al, %cl -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: setp %cl +; FAST_AVX-NEXT: setne %dl +; FAST_AVX-NEXT: orb %cl, %dl +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp une double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -323,9 +323,9 @@ define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_eq_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovneq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovneq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp eq i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -335,9 +335,9 @@ define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ne_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmoveq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ne i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -347,9 +347,9 @@ define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ugt_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ugt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -360,9 +360,9 @@ define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_uge_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp uge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -372,9 +372,9 @@ define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ult_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -384,9 +384,9 @@ define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ule_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ule i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -396,9 +396,9 @@ define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sgt_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovleq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovleq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sgt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -408,9 +408,9 @@ define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sge_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovlq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -420,9 +420,9 @@ define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_slt_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp slt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -432,9 +432,9 @@ define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sle_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d Index: test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll +++ test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll @@ -280,12 +280,13 @@ define i8 @select_icmp_sle_i8(i64 %a, i64 %b, i8 %c, i8 %d) { ; CHECK-LABEL: select_icmp_sle_i8: ; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: cmpq %rsi, %rdi ; CHECK-NEXT: jle LBB12_2 ; CHECK-NEXT: ## %bb.1: -; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: LBB12_2: -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, i8 %c, i8 %d Index: test/CodeGen/X86/fast-isel-sext-zext.ll =================================================================== --- test/CodeGen/X86/fast-isel-sext-zext.ll +++ test/CodeGen/X86/fast-isel-sext-zext.ll @@ -9,15 +9,14 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: negb %al ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test1: ; X64: ## %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: ## kill: def $al killed $al killed $eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i8 %x to i1 %u = sext i1 %z to i8 ret i8 %u @@ -32,7 +31,6 @@ ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: ## kill: def $ax killed $ax killed $eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test2: ; X64: ## %bb.0: @@ -41,7 +39,6 @@ ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i16 %x to i1 %u = sext i1 %z to i16 ret i16 %u @@ -55,7 +52,6 @@ ; X32-NEXT: negb %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test3: ; X64: ## %bb.0: @@ -63,7 +59,6 @@ ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = sext i1 %z to i32 ret i32 %u @@ -77,7 +72,6 @@ ; X32-NEXT: negb %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test4: ; X64: ## %bb.0: @@ -85,7 +79,6 @@ ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = sext i1 %z to i32 ret i32 %u @@ -97,14 +90,13 @@ ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test5: ; X64: ## %bb.0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: ## kill: def $al killed $al killed $eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i8 %x to i1 %u = zext i1 %z to i8 ret i8 %u @@ -118,7 +110,6 @@ ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: ## kill: def $ax killed $ax killed $eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test6: ; X64: ## %bb.0: @@ -126,7 +117,6 @@ ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i16 %x to i1 %u = zext i1 %z to i16 ret i16 %u @@ -139,14 +129,12 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test7: ; X64: ## %bb.0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = zext i1 %z to i32 ret i32 %u @@ -159,14 +147,12 @@ ; X32-NEXT: andb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test8: ; X64: ## %bb.0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %z = trunc i32 %x to i1 %u = zext i1 %z to i32 ret i32 %u @@ -178,14 +164,12 @@ ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: ## kill: def $ax killed $ax killed $eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test9: ; X64: ## %bb.0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i16 ret i16 %u } @@ -195,13 +179,11 @@ ; X32: ## %bb.0: ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test10: ; X64: ## %bb.0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i32 ret i32 %u } @@ -213,13 +195,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test11: ; X64: ## %bb.0: ; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i8 %x to i64 ret i64 %u } @@ -230,14 +210,12 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: ## kill: def $ax killed $ax killed $eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test12: ; X64: ## %bb.0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i16 ret i16 %u } @@ -247,13 +225,11 @@ ; X32: ## %bb.0: ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test13: ; X64: ## %bb.0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i32 ret i32 %u } @@ -264,13 +240,11 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test14: ; X64: ## %bb.0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i8 %x to i64 ret i64 %u } @@ -280,13 +254,11 @@ ; X32: ## %bb.0: ; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test15: ; X64: ## %bb.0: ; X64-NEXT: movswl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i16 %x to i32 ret i32 %u } @@ -298,13 +270,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test16: ; X64: ## %bb.0: ; X64-NEXT: movswq %di, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i16 %x to i64 ret i64 %u } @@ -314,13 +284,11 @@ ; X32: ## %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test17: ; X64: ## %bb.0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i16 %x to i32 ret i32 %u } @@ -331,13 +299,11 @@ ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test18: ; X64: ## %bb.0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i16 %x to i64 ret i64 %u } @@ -349,13 +315,11 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test19: ; X64: ## %bb.0: ; X64-NEXT: movslq %edi, %rax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = sext i32 %x to i64 ret i64 %u } @@ -366,13 +330,11 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: test20: ; X64: ## %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq -; X64-NEXT: ## -- End function %u = zext i32 %x to i64 ret i64 %u } Index: test/CodeGen/X86/fast-isel-shift.ll =================================================================== --- test/CodeGen/X86/fast-isel-shift.ll +++ test/CodeGen/X86/fast-isel-shift.ll @@ -5,8 +5,10 @@ ; CHECK-LABEL: shl_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shlb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shlb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = shl i8 %a, %b ret i8 %c @@ -16,9 +18,11 @@ ; CHECK-LABEL: shl_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cx -; CHECK-NEXT: shlw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: ## kill: def $cl killed $cx +; CHECK-NEXT: shlw %cl, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = shl i16 %a, %b ret i16 %c @@ -28,9 +32,9 @@ ; CHECK-LABEL: shl_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $ecx -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $ecx +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq %c = shl i32 %a, %b ret i32 %c @@ -40,9 +44,9 @@ ; CHECK-LABEL: shl_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: def $cl killed $rcx -; CHECK-NEXT: shlq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: def $cl killed $rcx +; CHECK-NEXT: shlq %cl, %rax ; CHECK-NEXT: retq %c = shl i64 %a, %b ret i64 %c @@ -52,8 +56,10 @@ ; CHECK-LABEL: lshr_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shrb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shrb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, %b ret i8 %c @@ -63,9 +69,11 @@ ; CHECK-LABEL: lshr_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cx -; CHECK-NEXT: shrw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: ## kill: def $cl killed $cx +; CHECK-NEXT: shrw %cl, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, %b ret i16 %c @@ -75,9 +83,9 @@ ; CHECK-LABEL: lshr_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $ecx -; CHECK-NEXT: shrl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $ecx +; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, %b ret i32 %c @@ -87,9 +95,9 @@ ; CHECK-LABEL: lshr_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: def $cl killed $rcx -; CHECK-NEXT: shrq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: def $cl killed $rcx +; CHECK-NEXT: shrq %cl, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, %b ret i64 %c @@ -99,8 +107,10 @@ ; CHECK-LABEL: ashr_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: sarb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: sarb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, %b ret i8 %c @@ -110,9 +120,11 @@ ; CHECK-LABEL: ashr_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cx -; CHECK-NEXT: sarw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: ## kill: def $cl killed $cx +; CHECK-NEXT: sarw %cl, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, %b ret i16 %c @@ -122,9 +134,9 @@ ; CHECK-LABEL: ashr_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $ecx -; CHECK-NEXT: sarl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $ecx +; CHECK-NEXT: sarl %cl, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, %b ret i32 %c @@ -134,9 +146,9 @@ ; CHECK-LABEL: ashr_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: def $cl killed $rcx -; CHECK-NEXT: sarq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: def $cl killed $rcx +; CHECK-NEXT: sarq %cl, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, %b ret i64 %c @@ -145,8 +157,9 @@ define i8 @shl_imm1_i8(i8 %a) { ; CHECK-LABEL: shl_imm1_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = shl i8 %a, 1 ret i8 %c @@ -185,8 +198,9 @@ define i8 @lshr_imm1_i8(i8 %a) { ; CHECK-LABEL: lshr_imm1_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, 1 ret i8 %c @@ -195,8 +209,9 @@ define i16 @lshr_imm1_i16(i16 %a) { ; CHECK-LABEL: lshr_imm1_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrw $1, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrw $1, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, 1 ret i16 %c @@ -205,8 +220,8 @@ define i32 @lshr_imm1_i32(i32 %a) { ; CHECK-LABEL: lshr_imm1_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $1, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 1 ret i32 %c @@ -215,8 +230,8 @@ define i64 @lshr_imm1_i64(i64 %a) { ; CHECK-LABEL: lshr_imm1_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrq $1, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $1, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 1 ret i64 %c @@ -225,8 +240,9 @@ define i8 @ashr_imm1_i8(i8 %a) { ; CHECK-LABEL: ashr_imm1_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, 1 ret i8 %c @@ -235,8 +251,9 @@ define i16 @ashr_imm1_i16(i16 %a) { ; CHECK-LABEL: ashr_imm1_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarw $1, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarw $1, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, 1 ret i16 %c @@ -245,8 +262,8 @@ define i32 @ashr_imm1_i32(i32 %a) { ; CHECK-LABEL: ashr_imm1_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $1, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 1 ret i32 %c @@ -255,8 +272,8 @@ define i64 @ashr_imm1_i64(i64 %a) { ; CHECK-LABEL: ashr_imm1_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarq $1, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $1, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 1 ret i64 %c @@ -265,8 +282,9 @@ define i8 @shl_imm4_i8(i8 %a) { ; CHECK-LABEL: shl_imm4_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = shl i8 %a, 4 ret i8 %c @@ -275,8 +293,9 @@ define i16 @shl_imm4_i16(i16 %a) { ; CHECK-LABEL: shl_imm4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlw $4, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = shl i16 %a, 4 ret i16 %c @@ -285,8 +304,8 @@ define i32 @shl_imm4_i32(i32 %a) { ; CHECK-LABEL: shl_imm4_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: shll $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $4, %eax ; CHECK-NEXT: retq %c = shl i32 %a, 4 ret i32 %c @@ -295,8 +314,8 @@ define i64 @shl_imm4_i64(i64 %a) { ; CHECK-LABEL: shl_imm4_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq $4, %rax ; CHECK-NEXT: retq %c = shl i64 %a, 4 ret i64 %c @@ -305,8 +324,9 @@ define i8 @lshr_imm4_i8(i8 %a) { ; CHECK-LABEL: lshr_imm4_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrb $4, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, 4 ret i8 %c @@ -315,8 +335,9 @@ define i16 @lshr_imm4_i16(i16 %a) { ; CHECK-LABEL: lshr_imm4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrw $4, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, 4 ret i16 %c @@ -325,8 +346,8 @@ define i32 @lshr_imm4_i32(i32 %a) { ; CHECK-LABEL: lshr_imm4_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 4 ret i32 %c @@ -335,8 +356,8 @@ define i64 @lshr_imm4_i64(i64 %a) { ; CHECK-LABEL: lshr_imm4_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $4, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 4 ret i64 %c @@ -345,8 +366,9 @@ define i8 @ashr_imm4_i8(i8 %a) { ; CHECK-LABEL: ashr_imm4_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $4, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, 4 ret i8 %c @@ -355,8 +377,9 @@ define i16 @ashr_imm4_i16(i16 %a) { ; CHECK-LABEL: ashr_imm4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarw $4, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, 4 ret i16 %c @@ -365,8 +388,8 @@ define i32 @ashr_imm4_i32(i32 %a) { ; CHECK-LABEL: ashr_imm4_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarl $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $4, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 4 ret i32 %c @@ -375,8 +398,8 @@ define i64 @ashr_imm4_i64(i64 %a) { ; CHECK-LABEL: ashr_imm4_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $4, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 4 ret i64 %c Index: test/CodeGen/X86/fast-isel-store.ll =================================================================== --- test/CodeGen/X86/fast-isel-store.ll +++ test/CodeGen/X86/fast-isel-store.ll @@ -11,8 +11,8 @@ define i32 @test_store_32(i32* nocapture %addr, i32 %value) { ; ALL32-LABEL: test_store_32: ; ALL32: # %bb.0: # %entry -; ALL32-NEXT: movl %esi, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movl %eax, (%rdi) ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_32: @@ -29,8 +29,9 @@ define i16 @test_store_16(i16* nocapture %addr, i16 %value) { ; ALL32-LABEL: test_store_16: ; ALL32: # %bb.0: # %entry -; ALL32-NEXT: movw %si, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movw %ax, (%rdi) +; ALL32-NEXT: # kill: def $ax killed $ax killed $eax ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_16: @@ -58,11 +59,11 @@ ; SSE64-NEXT: movdqu %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32: -; AVXONLY32: # %bb.0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqu %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32: +; AVX32: # %bb.0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqu %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32: ; AVX64: # %bb.0: @@ -70,18 +71,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqu %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32: -; KNL32: # %bb.0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqu %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32: -; SKX32: # %bb.0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqu %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 1 ret <4 x i32> %foo @@ -101,11 +90,11 @@ ; SSE64-NEXT: movdqa %xmm0, (%eax) ; SSE64-NEXT: retl ; -; AVXONLY32-LABEL: test_store_4xi32_aligned: -; AVXONLY32: # %bb.0: -; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVXONLY32-NEXT: vmovdqa %xmm0, (%rdi) -; AVXONLY32-NEXT: retq +; AVX32-LABEL: test_store_4xi32_aligned: +; AVX32: # %bb.0: +; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX32-NEXT: vmovdqa %xmm0, (%rdi) +; AVX32-NEXT: retq ; ; AVX64-LABEL: test_store_4xi32_aligned: ; AVX64: # %bb.0: @@ -113,18 +102,6 @@ ; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX64-NEXT: vmovdqa %xmm0, (%eax) ; AVX64-NEXT: retl -; -; KNL32-LABEL: test_store_4xi32_aligned: -; KNL32: # %bb.0: -; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; KNL32-NEXT: vmovdqa %xmm0, (%rdi) -; KNL32-NEXT: retq -; -; SKX32-LABEL: test_store_4xi32_aligned: -; SKX32: # %bb.0: -; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqa %xmm0, (%rdi) -; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 16 ret <4 x i32> %foo Index: test/CodeGen/X86/fixup-bw-copy.ll =================================================================== --- test/CodeGen/X86/fixup-bw-copy.ll +++ test/CodeGen/X86/fixup-bw-copy.ll @@ -7,15 +7,11 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" define i8 @test_movb(i8 %a0) { -; BWON64-LABEL: test_movb: -; BWON64: # %bb.0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movb: -; BWOFF64: # %bb.0: -; BWOFF64-NEXT: movb %dil, %al -; BWOFF64-NEXT: retq +; X64-LABEL: test_movb: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq ; ; X32-LABEL: test_movb: ; X32: # %bb.0: @@ -25,15 +21,11 @@ } define i16 @test_movw(i16 %a0) { -; BWON64-LABEL: test_movw: -; BWON64: # %bb.0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movw: -; BWOFF64: # %bb.0: -; BWOFF64-NEXT: movw %di, %ax -; BWOFF64-NEXT: retq +; X64-LABEL: test_movw: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq ; ; BWON32-LABEL: test_movw: ; BWON32: # %bb.0: Index: test/CodeGen/X86/fold-vector-sext-crash2.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-crash2.ll +++ test/CodeGen/X86/fold-vector-sext-crash2.ll @@ -28,14 +28,14 @@ ; ; X64-LABEL: test_sext1: ; X64: # %bb.0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-99, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 56(%rax) +; X64-NEXT: movq $-1, 48(%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-99, 32(%rax) ; X64-NEXT: retq %Se = sext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -66,14 +66,14 @@ ; ; X64-LABEL: test_sext2: ; X64: # %bb.0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-1999, 32(%rdi) # imm = 0xF831 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 56(%rax) +; X64-NEXT: movq $-1, 48(%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-1999, 32(%rax) # imm = 0xF831 ; X64-NEXT: retq %Se = sext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -104,13 +104,13 @@ ; ; X64-LABEL: test_zext1: ; X64: # %bb.0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $0, 40(%rdi) -; X64-NEXT: movq $254, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $0, 40(%rax) +; X64-NEXT: movq $254, 32(%rax) ; X64-NEXT: retq %Se = zext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -141,13 +141,13 @@ ; ; X64-LABEL: test_zext2: ; X64: # %bb.0: -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-2, 32(%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, 48(%rax) +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: movq $-1, 40(%rax) +; X64-NEXT: movq $-2, 32(%rax) ; X64-NEXT: retq %Se = zext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> Index: test/CodeGen/X86/ghc-cc64.ll =================================================================== --- test/CodeGen/X86/ghc-cc64.ll +++ test/CodeGen/X86/ghc-cc64.ll @@ -22,8 +22,8 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rdi, %r13 - ; CHECK-NEXT: movq %rsi, %rbp + ; CHECK: movq %rsi, %rbp + ; CHECK-NEXT: movq %rdi, %r13 ; CHECK-NEXT: callq addtwo %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) ; CHECK: callq foo Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -4,11 +4,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rsi, %rax + ; CHECK: movq %rsi, %rdx ; CHECK-NEXT: movl $8, %ecx ; CHECK-NEXT: movl $9, %r8d ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 Index: test/CodeGen/X86/i128-mul.ll =================================================================== --- test/CodeGen/X86/i128-mul.ll +++ test/CodeGen/X86/i128-mul.ll @@ -336,17 +336,17 @@ ; X64-BMI-NEXT: je .LBB1_3 ; X64-BMI-NEXT: # %bb.1: # %for.body.preheader ; X64-BMI-NEXT: xorl %r10d, %r10d -; X64-BMI-NEXT: xorl %eax, %eax +; X64-BMI-NEXT: xorl %ecx, %ecx ; X64-BMI-NEXT: .p2align 4, 0x90 ; X64-BMI-NEXT: .LBB1_2: # %for.body ; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-BMI-NEXT: movq %r8, %rdx -; X64-BMI-NEXT: mulxq (%r9,%rax,8), %rcx, %rdx -; X64-BMI-NEXT: addq %r10, %rcx +; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx +; X64-BMI-NEXT: addq %r10, %rax ; X64-BMI-NEXT: adcq $0, %rdx -; X64-BMI-NEXT: movq %rcx, (%rsi,%rax,8) -; X64-BMI-NEXT: incq %rax -; X64-BMI-NEXT: cmpq %rax, %rdi +; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-BMI-NEXT: incq %rcx +; X64-BMI-NEXT: cmpq %rcx, %rdi ; X64-BMI-NEXT: movq %rdx, %r10 ; X64-BMI-NEXT: jne .LBB1_2 ; X64-BMI-NEXT: .LBB1_3: # %for.end Index: test/CodeGen/X86/iabs.ll =================================================================== --- test/CodeGen/X86/iabs.ll +++ test/CodeGen/X86/iabs.ll @@ -22,10 +22,11 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarb $7, %al -; X64-NEXT: addb %al, %dil -; X64-NEXT: xorb %al, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarb $7, %cl +; X64-NEXT: addb %cl, %al +; X64-NEXT: xorb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 Index: test/CodeGen/X86/imul.ll =================================================================== --- test/CodeGen/X86/imul.ll +++ test/CodeGen/X86/imul.ll @@ -158,16 +158,16 @@ define i32 @mul4294967295_32(i32 %A) { ; X64-LABEL: mul4294967295_32: -; X64: negl %edi -; X64-NEXT: movl %edi, %eax +; X64: movl %edi, %eax +; X64-NEXT: negl %eax %mul = mul i32 %A, 4294967295 ret i32 %mul } define i64 @mul18446744073709551615_64(i64 %A) { ; X64-LABEL: mul18446744073709551615_64: -; X64: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64: movq %rdi, %rax +; X64-NEXT: negq %rax %mul = mul i64 %A, 18446744073709551615 ret i64 %mul } Index: test/CodeGen/X86/ipra-local-linkage.ll =================================================================== --- test/CodeGen/X86/ipra-local-linkage.ll +++ test/CodeGen/X86/ipra-local-linkage.ll @@ -24,7 +24,7 @@ call void @foo() ; CHECK-LABEL: bar: ; CHECK: callq foo - ; CHECK-NEXT: movl %eax, %r15d + ; CHECK-NEXT: movl %edi, %r15d call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) ret void } Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -88,6 +88,8 @@ ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -101,12 +103,11 @@ ; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %ch, %cl ; CHECK-NEXT: shll %cl, %ebx ; CHECK-NEXT: shldl %cl, %edx, %ebp -; CHECK-NEXT: testb $32, %cl +; CHECK-NEXT: testb $32, %ch ; CHECK-NEXT: je .LBB4_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: movl %ebx, %ebp Index: test/CodeGen/X86/legalize-shl-vec.ll =================================================================== --- test/CodeGen/X86/legalize-shl-vec.ll +++ test/CodeGen/X86/legalize-shl-vec.ll @@ -42,21 +42,21 @@ ; ; X64-LABEL: test_shl: ; X64: # %bb.0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: shldq $2, %rax, %rcx -; X64-NEXT: shldq $2, %rdx, %rax -; X64-NEXT: shldq $2, %r9, %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: shldq $2, %rcx, %rdx +; X64-NEXT: shldq $2, %rsi, %rcx +; X64-NEXT: shldq $2, %r9, %rsi ; X64-NEXT: shlq $2, %r9 -; X64-NEXT: movq %rcx, 56(%rdi) -; X64-NEXT: movq %rax, 48(%rdi) -; X64-NEXT: movq %rdx, 40(%rdi) -; X64-NEXT: movq %r9, 32(%rdi) +; X64-NEXT: movq %rdx, 56(%rax) +; X64-NEXT: movq %rcx, 48(%rax) +; X64-NEXT: movq %rsi, 40(%rax) +; X64-NEXT: movq %r9, 32(%rax) ; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> , i256 -1, i32 0 %Out = shl <2 x i256> %In, %Amt @@ -127,21 +127,21 @@ ; ; X64-LABEL: test_srl: ; X64: # %bb.0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: shrdq $4, %rdx, %r9 -; X64-NEXT: shrdq $4, %rax, %rdx -; X64-NEXT: shrdq $4, %rcx, %rax -; X64-NEXT: shrq $4, %rcx -; X64-NEXT: movq %rcx, 56(%rdi) -; X64-NEXT: movq %rax, 48(%rdi) -; X64-NEXT: movq %rdx, 40(%rdi) -; X64-NEXT: movq %r9, 32(%rdi) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: shrdq $4, %rsi, %r9 +; X64-NEXT: shrdq $4, %rcx, %rsi +; X64-NEXT: shrdq $4, %rdx, %rcx +; X64-NEXT: shrq $4, %rdx +; X64-NEXT: movq %rdx, 56(%rax) +; X64-NEXT: movq %rcx, 48(%rax) +; X64-NEXT: movq %rsi, 40(%rax) +; X64-NEXT: movq %r9, 32(%rax) ; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movaps %xmm0, 16(%rax) +; X64-NEXT: movaps %xmm0, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> , i256 -1, i32 0 %Out = lshr <2 x i256> %In, %Amt @@ -214,23 +214,23 @@ ; ; X64-LABEL: test_sra: ; X64: # %bb.0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: shrdq $6, %rdx, %r9 -; X64-NEXT: shrdq $6, %rax, %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: shrdq $6, %rsi, %r9 +; X64-NEXT: shrdq $6, %rcx, %rsi ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: shrdq $6, %rcx, %rax -; X64-NEXT: sarq $6, %rcx -; X64-NEXT: movq %rcx, 56(%rdi) -; X64-NEXT: movq %rax, 48(%rdi) -; X64-NEXT: movq %rdx, 40(%rdi) -; X64-NEXT: movq %r9, 32(%rdi) -; X64-NEXT: movq %r8, 24(%rdi) -; X64-NEXT: movq %r8, 16(%rdi) -; X64-NEXT: movq %r8, 8(%rdi) -; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrdq $6, %rdx, %rcx +; X64-NEXT: sarq $6, %rdx +; X64-NEXT: movq %rdx, 56(%rax) +; X64-NEXT: movq %rcx, 48(%rax) +; X64-NEXT: movq %rsi, 40(%rax) +; X64-NEXT: movq %r9, 32(%rax) +; X64-NEXT: movq %r8, 24(%rax) +; X64-NEXT: movq %r8, 16(%rax) +; X64-NEXT: movq %r8, 8(%rax) +; X64-NEXT: movq %r8, (%rax) ; X64-NEXT: retq %Amt = insertelement <2 x i256> , i256 -1, i32 0 %Out = ashr <2 x i256> %In, %Amt Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -62,10 +62,11 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ands_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: andb %cl, %dl -; CHECK-NEXT: andb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: andb %dil, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = and i8 %x2, %t0 @@ -78,10 +79,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ands_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: andl %ecx, %edx -; CHECK-NEXT: andl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = and i32 %x2, %t0 @@ -92,10 +93,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ands_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: andq %rcx, %rdx -; CHECK-NEXT: andq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = and i64 %x2, %t0 @@ -109,10 +110,11 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: orb %cl, %dl -; CHECK-NEXT: orb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = or i8 %x2, %t0 @@ -125,10 +127,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: orl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = or i32 %x2, %t0 @@ -139,10 +141,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = or i64 %x2, %t0 @@ -156,10 +158,11 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_xors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: xorb %cl, %dl -; CHECK-NEXT: xorb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: xorb %cl, %al +; CHECK-NEXT: xorb %dil, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = xor i8 %x2, %t0 @@ -172,10 +175,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_xors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: xorl %ecx, %edx -; CHECK-NEXT: xorl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = xor i32 %x2, %t0 @@ -186,10 +189,10 @@ define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_xors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: xorq %rcx, %rdx -; CHECK-NEXT: xorq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = xor i64 %x2, %t0 Index: test/CodeGen/X86/machine-cse.ll =================================================================== --- test/CodeGen/X86/machine-cse.ll +++ test/CodeGen/X86/machine-cse.ll @@ -133,24 +133,24 @@ define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp { ; CHECK-LABEL: bsd_memchr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: je .LBB3_4 ; CHECK-NEXT: # %bb.1: # %preheader -; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB3_2: # %do.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl %eax, %esi +; CHECK-NEXT: cmpl %edx, %esi ; CHECK-NEXT: je .LBB3_5 ; CHECK-NEXT: # %bb.3: # %do.cond ; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: incq %rdi +; CHECK-NEXT: incq %rax ; CHECK-NEXT: decq %rcx ; CHECK-NEXT: jne .LBB3_2 ; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .LBB3_5: # %return -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %cmp = icmp eq i64 %n, 0 Index: test/CodeGen/X86/madd.ll =================================================================== --- test/CodeGen/X86/madd.ll +++ test/CodeGen/X86/madd.ll @@ -1459,6 +1459,7 @@ define <32 x i32> @jumbled_indices32(<64 x i16> %A, <64 x i16> %B) { ; SSE2-LABEL: jumbled_indices32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm2 @@ -1467,15 +1468,14 @@ ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm5 ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm7 -; SSE2-NEXT: movdqa %xmm7, 112(%rdi) -; SSE2-NEXT: movdqa %xmm6, 96(%rdi) -; SSE2-NEXT: movdqa %xmm5, 80(%rdi) -; SSE2-NEXT: movdqa %xmm4, 64(%rdi) -; SSE2-NEXT: movdqa %xmm3, 48(%rdi) -; SSE2-NEXT: movdqa %xmm2, 32(%rdi) -; SSE2-NEXT: movdqa %xmm1, 16(%rdi) -; SSE2-NEXT: movdqa %xmm0, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movdqa %xmm7, 112(%rax) +; SSE2-NEXT: movdqa %xmm6, 96(%rax) +; SSE2-NEXT: movdqa %xmm5, 80(%rax) +; SSE2-NEXT: movdqa %xmm4, 64(%rax) +; SSE2-NEXT: movdqa %xmm3, 48(%rax) +; SSE2-NEXT: movdqa %xmm2, 32(%rax) +; SSE2-NEXT: movdqa %xmm1, 16(%rax) +; SSE2-NEXT: movdqa %xmm0, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: jumbled_indices32: Index: test/CodeGen/X86/mask-negated-bool.ll =================================================================== --- test/CodeGen/X86/mask-negated-bool.ll +++ test/CodeGen/X86/mask-negated-bool.ll @@ -4,8 +4,8 @@ define i32 @mask_negated_zext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_zext_bool1: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = zext i1 %x to i32 %neg = sub i32 0, %ext @@ -38,8 +38,8 @@ define i32 @mask_negated_sext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_sext_bool1: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = sext i1 %x to i32 %neg = sub i32 0, %ext Index: test/CodeGen/X86/misched-matmul.ll =================================================================== --- test/CodeGen/X86/misched-matmul.ll +++ test/CodeGen/X86/misched-matmul.ll @@ -10,7 +10,7 @@ ; more complex cases. ; ; CHECK: @wrap_mul4 -; CHECK: 23 regalloc - Number of spills inserted +; CHECK: 25 regalloc - Number of spills inserted define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 { entry: Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -11,6 +11,7 @@ ; X64-LABEL: test_mul_by_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 1 ret i16 %mul @@ -296,8 +297,9 @@ ; ; X64-LABEL: test_mul_by_16: ; X64: # %bb.0: -; X64-NEXT: shll $4, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $4, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 16 ret i16 %mul @@ -633,8 +635,9 @@ ; ; X64-LABEL: test_mul_by_32: ; X64: # %bb.0: -; X64-NEXT: shll $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 32 ret i16 %mul Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -780,14 +780,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -798,26 +798,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $4, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul @@ -1626,14 +1626,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1644,26 +1644,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $5, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -802,14 +802,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -822,26 +822,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $4, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul @@ -1707,14 +1707,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1727,26 +1727,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $5, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -13,7 +13,7 @@ ; X32-NEXT: subl $996, %esp # imm = 0x3E4 ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 32(%eax), %eax -; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -192(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebx @@ -23,8 +23,8 @@ ; X32-NEXT: movl %eax, -440(%ebp) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, -884(%ebp) # 4-byte Spill @@ -32,42 +32,42 @@ ; X32-NEXT: movl %eax, -416(%ebp) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -400(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -212(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 36(%eax), %eax -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: movl 36(%esi), %eax -; X32-NEXT: movl %eax, -316(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -176(%ebp) # 4-byte Spill ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -656(%ebp) # 4-byte Spill ; X32-NEXT: leal (%ebx,%edi), %eax @@ -81,13 +81,13 @@ ; X32-NEXT: movl %eax, -640(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -112(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %edi, -108(%ebp) # 4-byte Spill +; X32-NEXT: adcl %esi, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl %esi, %ebx -; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl (%eax), %eax -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi @@ -97,37 +97,37 @@ ; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -316(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %eax ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, -428(%ebp) # 4-byte Spill ; X32-NEXT: movl (%ecx), %eax -; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -764(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: movl %ebx, -424(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %ebx -; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -220(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %ebx, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl -176(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -224(%ebp) # 4-byte Folded Spill ; X32-NEXT: setb -388(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 4(%eax), %eax @@ -144,44 +144,44 @@ ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bh ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill ; X32-NEXT: movzbl %bh, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -244(%ebp) # 4-byte Spill ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -112(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 52(%eax), %eax ; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -196(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 56(%eax), %eax ; X32-NEXT: movl %eax, -408(%ebp) # 4-byte Spill @@ -193,26 +193,26 @@ ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: addl %esi, %ebx -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: adcl -216(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -268(%ebp) # 4-byte Spill +; X32-NEXT: adcl -220(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill -; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl -124(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -188(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movzbl -156(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 40(%eax), %eax ; X32-NEXT: movl %eax, -352(%ebp) # 4-byte Spill @@ -221,47 +221,47 @@ ; X32-NEXT: movl %eax, -364(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %eax, %edi ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: addl %esi, %edi -; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl -124(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: seto %al ; X32-NEXT: lahf ; X32-NEXT: movl %eax, %eax ; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %ecx ; X32-NEXT: movl 16(%ecx), %eax -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -312(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl 20(%ecx), %eax -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movl %ebx, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -160(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -272,70 +272,70 @@ ; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %ebx ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -32(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl %edi, -116(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -768(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -776(%ebp) # 4-byte Spill -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -772(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -52(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -332(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -268(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -32(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 20(%eax), %eax -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -347,37 +347,37 @@ ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %edx ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -308(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: movl %ebx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -24(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -464(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 4(%eax), %eax -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %esi, -276(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edi @@ -389,20 +389,20 @@ ; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl -432(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %esi, %edx ; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload @@ -418,47 +418,47 @@ ; X32-NEXT: popl %eax ; X32-NEXT: movl %edx, -736(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx -; X32-NEXT: adcl -120(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill +; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %edx ; X32-NEXT: movl %edx, %eax ; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -620(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -788(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -784(%ebp) # 4-byte Spill -; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -212(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -804(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -820(%ebp) # 4-byte Spill -; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -116(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %eax @@ -466,52 +466,52 @@ ; X32-NEXT: movl %esi, -576(%ebp) # 4-byte Spill ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -540(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -800(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -796(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -792(%ebp) # 4-byte Spill -; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -224(%ebp) # 4-byte Spill ; X32-NEXT: movzbl -388(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 40(%eax), %eax -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %edi ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -212(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl -376(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -816(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %edi, -372(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -812(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -808(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill ; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload @@ -521,10 +521,10 @@ ; X32-NEXT: lahf ; X32-NEXT: movl %eax, %eax ; X32-NEXT: movl %eax, -740(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -624(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -628(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %esi @@ -536,14 +536,14 @@ ; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl 52(%esi), %eax -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %edi, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebx, %esi -; X32-NEXT: movl %esi, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -551,37 +551,37 @@ ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 56(%eax), %eax -; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -360(%ebp) # 4-byte Spill ; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill ; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movl %ebx, -472(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: movl %edi, -436(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -824(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, -588(%ebp) # 4-byte Spill ; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -632(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -828(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -636(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax @@ -596,14 +596,14 @@ ; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -920(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: adcl -384(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -932(%ebp) # 4-byte Spill @@ -629,10 +629,10 @@ ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, -528(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -524(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %edx, %eax -; X32-NEXT: movl -264(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -976(%ebp) # 4-byte Spill ; X32-NEXT: movl 64(%ecx), %eax @@ -647,44 +647,44 @@ ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: movl %ecx, -992(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -1008(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: movl -336(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %eax -; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -832(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -672(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -836(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -840(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -436(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -844(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -176(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -680(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -856(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -268(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -852(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -848(%ebp) # 4-byte Spill -; X32-NEXT: movl -44(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: pushl %eax @@ -694,22 +694,22 @@ ; X32-NEXT: popl %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -860(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, -864(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -172(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -868(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -684(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -876(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -472(%ebp), %ebx # 4-byte Reload @@ -719,27 +719,27 @@ ; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -880(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, -888(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -688(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -900(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -268(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -896(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -904(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 68(%eax), %eax -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi @@ -773,14 +773,14 @@ ; X32-NEXT: movl -480(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -692(%ebp) # 4-byte Spill -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl -652(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl %esi, -908(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -916(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: movl %esi, -912(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload @@ -788,9 +788,9 @@ ; X32-NEXT: movl %esi, -696(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -652(%ebp) # 4-byte Spill -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -924(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -928(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: movl %ecx, %eax @@ -832,21 +832,21 @@ ; X32-NEXT: movl -660(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, -940(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, -944(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: movl %edi, -936(%ebp) # 4-byte Spill ; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %ebx, %edi ; X32-NEXT: movl %edi, -708(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -660(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -952(%ebp) # 4-byte Spill -; X32-NEXT: adcl -56(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -956(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 84(%eax), %eax @@ -883,15 +883,15 @@ ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movl %esi, %edx ; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill -; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill ; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -276(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -968(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -964(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -972(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 68(%eax), %eax @@ -927,36 +927,36 @@ ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl %edx, %eax -; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -716(%ebp) # 4-byte Spill ; X32-NEXT: movl -664(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi ; X32-NEXT: adcl -276(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -988(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %esi -; X32-NEXT: adcl -240(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -984(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi -; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -980(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl %esi, -720(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -664(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: movl %edi, -996(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: movl %ecx, -1000(%ebp) # 4-byte Spill ; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax ; X32-NEXT: movl -528(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -1004(%ebp) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax @@ -970,70 +970,70 @@ ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -80(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl %edi, -72(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, -84(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, -72(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: setb %dl -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movzbl %dl, %edx ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl %edx, -608(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -28(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -164(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -60(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -52(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -56(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -160(%ebp) # 4-byte Spill ; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 44(%eax), %eax -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -1044,7 +1044,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl -364(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill @@ -1054,89 +1054,89 @@ ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl %ecx, %ebx -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -320(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %edi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl %esi, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -108(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -80(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: addl -364(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -60(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movzbl -88(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -132(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -112(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -272(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -24(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload +; X32-NEXT: addl -164(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -108(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -268(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -64(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -20(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill +; X32-NEXT: adcl -56(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -456(%ebp), %ecx # 4-byte Reload ; X32-NEXT: pushl %eax ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf ; X32-NEXT: popl %eax -; X32-NEXT: adcl -72(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -84(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -608(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -72(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -760(%ebp) # 4-byte Spill -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -756(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -752(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -748(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -744(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 12(%eax), %eax -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %edx @@ -1145,20 +1145,20 @@ ; X32-NEXT: movl %edi, %esi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl -584(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %edi, %ecx ; X32-NEXT: movl -432(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movl %esi, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -456(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %edi, -432(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -276(%ebp), %edi # 4-byte Reload @@ -1168,45 +1168,45 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: setb %bl -; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -24(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -20(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -180(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -184(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %edi, %edx -; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -172(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -20(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -272(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -24(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movl %ebx, -584(%ebp) # 4-byte Spill ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, -276(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl -112(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -172(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -272(%ebp) # 4-byte Spill +; X32-NEXT: adcl -108(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -736(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -432(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -456(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edx, -164(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl %ebx, -236(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edx, -160(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 44(%eax), %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebx @@ -1216,7 +1216,7 @@ ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl -304(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %esi @@ -1225,88 +1225,87 @@ ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %edx, %esi ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl -176(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl -376(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edx ; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %edi, -376(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl -224(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl %edx, %eax ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %dl ; X32-NEXT: addl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movzbl %dl, %edx ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -176(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -200(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -208(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl -472(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -372(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -40(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -740(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf ; X32-NEXT: movl -376(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -456(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl %ebx, -584(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -200(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -208(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -240(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, -168(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -640(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %eax ; X32-NEXT: addb $127, %al ; X32-NEXT: sahf -; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -640(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -48(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -472(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -436(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 60(%eax), %eax -; X32-NEXT: movl %eax, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx @@ -1317,10 +1316,10 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -1328,17 +1327,17 @@ ; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -1350,7 +1349,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload @@ -1361,30 +1360,30 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -68(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -764(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -40(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -44(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -48(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload @@ -1392,136 +1391,135 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -80(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -16(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -20(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -24(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -36(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -44(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill ; X32-NEXT: adcl -420(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl -616(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl -612(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -268(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: setb -124(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -152(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -124(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -424(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill @@ -1539,103 +1537,102 @@ ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -320(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -424(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -420(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -44(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -48(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -364(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -68(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -196(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -200(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -504(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -48(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -40(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -64(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -268(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -1643,121 +1640,123 @@ ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -416(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -68(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl -68(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -296(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -768(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -40(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl -40(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 28(%eax), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -372(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -124(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -1767,48 +1766,48 @@ ; X32-NEXT: adcl -776(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -772(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -780(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -508(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -504(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %ecx, -124(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl -76(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl -268(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl -16(%ebp), %ecx # 1-byte Folded Reload @@ -1816,51 +1815,50 @@ ; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -440(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl -80(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -88(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -332(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -448(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -268(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -1868,7 +1866,7 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax @@ -1880,105 +1878,105 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -76(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -80(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -76(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl -268(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -16(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -332(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl -648(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl -644(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill ; X32-NEXT: adcl -572(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill ; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -80(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -296(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: movl -332(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: addl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -36(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -372(%ebp) # 4-byte Spill ; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl -760(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -296(%ebp) # 4-byte Spill ; X32-NEXT: movl -756(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %ecx, -268(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl -752(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl -748(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -332(%ebp) # 4-byte Spill ; X32-NEXT: movl -744(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 28(%eax), %eax @@ -1993,7 +1991,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload @@ -2003,23 +2001,23 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload @@ -2029,31 +2027,31 @@ ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -228(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -232(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -428(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -52(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -64(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload @@ -2063,78 +2061,78 @@ ; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -428(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -48(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -64(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -44(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -48(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -24(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -20(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -64(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: addl -232(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl -596(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill ; X32-NEXT: adcl -536(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi @@ -2145,29 +2143,29 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -2181,7 +2179,7 @@ ; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -536(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax @@ -2189,15 +2187,15 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -232(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill @@ -2215,122 +2213,122 @@ ; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -452(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -536(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -596(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -228(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -232(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -32(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -36(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -228(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -232(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -344(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -404(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -532(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -592(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: addl -572(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl -448(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -196(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -200(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -572(%ebp) # 4-byte Spill ; X32-NEXT: adcl -428(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -464(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -228(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -232(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -180(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -2343,7 +2341,7 @@ ; X32-NEXT: movl %eax, -532(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax @@ -2355,9 +2353,9 @@ ; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill ; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -228(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -232(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx @@ -2370,22 +2368,22 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload @@ -2394,43 +2392,43 @@ ; X32-NEXT: adcl %eax, -452(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -196(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -200(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -232(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -448(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -196(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -200(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb -328(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl -328(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl -228(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -200(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -448(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -2440,22 +2438,22 @@ ; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -788(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -784(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -592(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -532(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -572(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -428(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -452(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -20(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -196(%ebp) # 4-byte Spill +; X32-NEXT: addl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill +; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -620(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload @@ -2463,10 +2461,10 @@ ; X32-NEXT: setb -464(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -2474,47 +2472,47 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -44(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -48(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -368(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax @@ -2522,18 +2520,18 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -540(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -576(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -576(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -368(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -2541,140 +2539,140 @@ ; X32-NEXT: addl -368(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: movl -576(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -368(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -64(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -48(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: setb -576(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -24(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -20(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -280(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movzbl -576(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -540(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl -800(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl -796(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl -792(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -228(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -232(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -368(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -328(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movzbl -464(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: addl -344(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -164(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -56(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -72(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -64(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -160(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -616(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl -68(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -612(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -424(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl -420(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill ; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: adcl -504(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill +; X32-NEXT: adcl -124(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: adcl -60(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -372(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -268(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -332(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -2682,11 +2680,11 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload @@ -2695,34 +2693,34 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -72(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax @@ -2730,18 +2728,18 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -468(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -804(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -160(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -2749,134 +2747,134 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -160(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -164(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -232(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -236(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -40(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -32(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -72(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -84(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -468(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl -816(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill ; X32-NEXT: adcl -812(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -344(%ebp) # 4-byte Spill ; X32-NEXT: adcl -808(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -468(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl -468(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -124(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax @@ -2884,15 +2882,15 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -512(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -820(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -196(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -200(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill ; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -60(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -84(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill @@ -2903,159 +2901,159 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -404(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -328(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -200(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -468(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -60(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -84(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -200(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -64(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -60(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -404(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: setb -404(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl -404(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -72(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -196(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -200(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -512(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -676(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -624(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -628(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -152(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -124(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: adcl -160(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -628(%ebp) # 4-byte Spill -; X32-NEXT: adcl -232(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -236(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -624(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -344(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -232(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -236(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -140(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -140(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -540(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload +; X32-NEXT: setb -200(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -196(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -200(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -588(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -824(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -160(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -232(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl $0, -124(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -236(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -3066,59 +3064,59 @@ ; X32-NEXT: movl 60(%eax), %esi ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl -420(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -232(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -424(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -196(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -152(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -232(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -124(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -236(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -236(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -124(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -124(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -152(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -124(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx @@ -3126,32 +3124,32 @@ ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: movl -420(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -232(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -236(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -588(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -632(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -828(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -636(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -404(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -540(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -628(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -232(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -624(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -196(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -56(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -52(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -420(%ebp) # 4-byte Spill ; X32-NEXT: adcl -344(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -636(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -632(%ebp) # 4-byte Spill ; X32-NEXT: setb -588(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload @@ -3159,7 +3157,7 @@ ; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3167,11 +3165,11 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload @@ -3181,52 +3179,52 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -64(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: setb -60(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -64(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -60(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -672(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -832(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -72(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3234,206 +3232,206 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -344(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -84(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -60(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -76(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -76(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -72(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -344(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -672(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl -836(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill ; X32-NEXT: adcl -840(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl -844(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -232(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -424(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -152(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl -420(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -636(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -64(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -632(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl -588(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: movl %ebx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl -56(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -52(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill ; X32-NEXT: addl -512(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl -676(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill ; X32-NEXT: adcl -432(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl -456(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -60(%ebp) # 4-byte Spill ; X32-NEXT: movl -344(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -584(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -172(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -32(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -272(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -168(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edx, -508(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -504(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -328(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -368(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -468(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -48(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -404(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, -540(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, -232(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -464(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -72(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -64(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -292(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl -372(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -124(%ebp) # 4-byte Spill +; X32-NEXT: adcl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl -296(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -60(%ebp) # 4-byte Spill +; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -332(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -240(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -440(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -44(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax @@ -3449,29 +3447,29 @@ ; X32-NEXT: addl -680(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -884(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -168(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload @@ -3479,44 +3477,44 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -172(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -168(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: adcl -168(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -168(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -172(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -168(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload @@ -3524,68 +3522,68 @@ ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -80(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl -76(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -680(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl -856(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl -852(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -292(%ebp) # 4-byte Spill ; X32-NEXT: adcl -848(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -148(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi @@ -3594,7 +3592,7 @@ ; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax @@ -3602,18 +3600,18 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -656(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -892(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3621,56 +3619,56 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -32(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -36(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload @@ -3678,50 +3676,50 @@ ; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -656(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -700(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -860(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -864(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -272(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill +; X32-NEXT: addl -268(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill ; X32-NEXT: adcl -276(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -168(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx @@ -3729,9 +3727,9 @@ ; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -296(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ebx # 4-byte Reload @@ -3739,22 +3737,22 @@ ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax @@ -3764,13 +3762,13 @@ ; X32-NEXT: adcl -868(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -276(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill @@ -3781,20 +3779,20 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx @@ -3802,38 +3800,38 @@ ; X32-NEXT: movl -512(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -276(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb -512(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -36(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi @@ -3841,7 +3839,7 @@ ; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill ; X32-NEXT: movl -296(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl -512(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -3851,66 +3849,66 @@ ; X32-NEXT: adcl -876(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -872(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -880(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, -268(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -332(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -276(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -368(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -80(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -76(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -296(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl -292(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -168(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -168(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -172(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -168(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -392(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -176(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -172(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -412(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movl %esi, -336(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill ; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -3918,11 +3916,11 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -332(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload @@ -3933,59 +3931,59 @@ ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl -688(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -888(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -36(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -44(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -332(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -336(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -164(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -160(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, -44(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -332(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -24(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -336(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -140(%ebp) # 4-byte Spill -; X32-NEXT: setb -176(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill +; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -136(%ebp) # 4-byte Spill +; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -332(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -336(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -3998,21 +3996,21 @@ ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -332(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl -332(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -392(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -204(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -412(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -132(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -140(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -176(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl -164(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -136(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl -172(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -688(%ebp), %ebx # 4-byte Folded Reload @@ -4022,15 +4020,15 @@ ; X32-NEXT: movl %esi, -392(%ebp) # 4-byte Spill ; X32-NEXT: adcl -904(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -412(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -80(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -76(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -80(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -360(%ebp) # 4-byte Folded Spill @@ -4039,41 +4037,41 @@ ; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -656(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill ; X32-NEXT: adcl -700(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill ; X32-NEXT: adcl -376(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -220(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -224(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -336(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -640(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -360(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -392(%ebp) # 4-byte Spill ; X32-NEXT: adcl -436(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -232(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -236(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -456(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -344(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -268(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill ; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -168(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, -336(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edx @@ -4085,172 +4083,174 @@ ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -140(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -136(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -480(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -476(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl -40(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl -692(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl -32(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl -692(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -920(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -132(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 76(%eax), %edx -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edx +; X32-NEXT: movl 76(%eax), %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -132(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -116(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebx, -140(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -136(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -200(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -284(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -516(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -200(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -208(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl -284(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -224(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: adcl -204(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -224(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -204(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -224(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -204(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -308(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -172(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -200(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -692(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill ; X32-NEXT: adcl -908(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl -916(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -68(%ebp) # 4-byte Spill ; X32-NEXT: adcl -912(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload @@ -4258,15 +4258,15 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -480(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi @@ -4279,11 +4279,11 @@ ; X32-NEXT: addl -436(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload @@ -4294,26 +4294,26 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -696(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -932(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -224(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -72(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -204(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -436(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -72(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -84(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax @@ -4327,92 +4327,92 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -436(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -472(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -516(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -72(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -72(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -84(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -104(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl -484(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -488(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -224(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl -204(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl -696(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -652(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -924(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl -928(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -64(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill -; X32-NEXT: adcl -220(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -60(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill +; X32-NEXT: adcl -224(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -124(%ebp) # 4-byte Spill +; X32-NEXT: adcl -32(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -40(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -544(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax @@ -4421,25 +4421,25 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -380(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -356(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edi @@ -4447,42 +4447,42 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -92(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -56(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -52(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -704(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -948(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -140(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -220(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, -224(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -60(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -580(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -140(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: movl 92(%eax), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload @@ -4492,35 +4492,35 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -28(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl -292(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -32(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -376(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -220(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -224(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: setb -376(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -580(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -220(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -140(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl -292(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: setb -292(%ebp) # 1-byte Folded Spill @@ -4529,15 +4529,15 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl -292(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -60(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -224(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movzbl -376(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -4546,34 +4546,34 @@ ; X32-NEXT: adcl -940(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -944(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -936(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -32(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -176(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill +; X32-NEXT: addl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -60(%ebp) # 4-byte Spill +; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -152(%ebp) # 4-byte Spill -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill +; X32-NEXT: adcl -36(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -124(%ebp) # 4-byte Spill +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -548(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx @@ -4584,29 +4584,29 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -380(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -356(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movl %esi, -380(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl %ecx, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -4617,10 +4617,10 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax @@ -4628,32 +4628,32 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -708(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -960(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -176(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -172(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -380(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -356(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -580(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -140(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edi, %ebx ; X32-NEXT: addl %esi, %eax @@ -4661,28 +4661,28 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -376(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -380(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -116(%ebp) # 4-byte Spill ; X32-NEXT: adcl -356(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill ; X32-NEXT: setb -356(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -580(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -380(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -4695,20 +4695,20 @@ ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb -380(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl -380(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -600(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -604(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -224(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -204(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl -116(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -84(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl -356(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx @@ -4716,117 +4716,117 @@ ; X32-NEXT: adcl -660(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -952(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -956(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -76(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -72(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -72(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -84(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -32(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, -172(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -36(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, -380(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -516(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -116(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -484(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -488(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -116(%ebp) # 4-byte Spill ; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -220(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -100(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -480(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -212(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl -356(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -84(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %esi, -176(%ebp) # 4-byte Spill +; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -212(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -116(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -476(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -32(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -36(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -480(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload @@ -4834,42 +4834,42 @@ ; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -356(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -204(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -32(%ebp) # 4-byte Spill +; X32-NEXT: adcl -212(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -36(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -88(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -480(%ebp) # 4-byte Spill ; X32-NEXT: adcl -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -384(%ebp) # 4-byte Spill -; X32-NEXT: setb -204(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -212(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -176(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -112(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -108(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -84(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -88(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -484(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload @@ -4880,8 +4880,8 @@ ; X32-NEXT: movl -480(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -116(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -384(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -204(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, -176(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -212(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx @@ -4889,34 +4889,34 @@ ; X32-NEXT: movl -548(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %edi, %ecx ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: imull -544(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl -580(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -148(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -192(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -236(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: addl -212(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -240(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl %edi, %esi ; X32-NEXT: movl -548(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -4928,48 +4928,48 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -84(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl -476(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: imull -252(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -248(%ebp) # 4-byte Spill ; X32-NEXT: movl -516(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi ; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -148(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill +; X32-NEXT: adcl -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill ; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi @@ -4977,28 +4977,28 @@ ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl -300(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -248(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb -244(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -248(%ebp) # 4-byte Folded Reload +; X32-NEXT: setb -248(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -244(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -248(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl -128(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -132(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -236(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -164(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -240(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -212(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -88(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -116(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill +; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill ; X32-NEXT: adcl -484(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -384(%ebp) # 4-byte Spill ; X32-NEXT: adcl -488(%ebp), %edx # 4-byte Folded Reload @@ -5006,13 +5006,13 @@ ; X32-NEXT: movl 8(%ebp), %esi ; X32-NEXT: movl 104(%esi), %ebx ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, -244(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %ebx, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl 108(%esi), %eax -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -5023,93 +5023,93 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb -116(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebx, %edi ; X32-NEXT: movzbl -116(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -256(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -136(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: movl 96(%ecx), %edi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -164(%ebp) # 4-byte Spill ; X32-NEXT: movl 100(%ecx), %eax ; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl -132(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -164(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb -144(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -140(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -116(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movzbl -144(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -140(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: addl -236(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -240(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -28(%ebp) # 4-byte Spill -; X32-NEXT: adcl -204(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -140(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -136(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -212(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl -116(%ebp), %eax # 4-byte Reload @@ -5117,41 +5117,41 @@ ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -192(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -236(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -204(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -212(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -112(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -188(%ebp) # 4-byte Spill -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -108(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -192(%ebp) # 4-byte Spill +; X32-NEXT: setb -108(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -256(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -148(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -96(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax @@ -5160,40 +5160,40 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -248(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -128(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -28(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -256(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -112(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -148(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -108(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl %edi, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -252(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: movl 112(%ecx), %eax -; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill ; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl 116(%ecx), %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl 120(%ecx), %eax ; X32-NEXT: movl %ecx, %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %esi, %edi -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx @@ -5201,28 +5201,28 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: imull %eax, %ebx ; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -96(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -108(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -104(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -140(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -104(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: mull -100(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -104(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -100(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -5230,34 +5230,34 @@ ; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl %edx, -96(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %edi ; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx ; X32-NEXT: imull -116(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -284(%ebp) # 4-byte Spill -; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: movl -212(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -104(%ebp) # 4-byte Spill +; X32-NEXT: addl -100(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -100(%ebp) # 4-byte Spill ; X32-NEXT: adcl -284(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill @@ -5274,62 +5274,62 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull -116(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -100(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: addl -100(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -176(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -108(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -168(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -152(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -92(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -96(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -28(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, %edi -; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -148(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl -248(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -252(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill ; X32-NEXT: adcl -128(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl -304(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -64(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -220(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload +; X32-NEXT: addl -60(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -164(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -224(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -240(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl -356(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -204(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -32(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -148(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -144(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, -284(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -160(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -384(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -176(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -300(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -72(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl -176(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -84(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill +; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill +; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -284(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl -380(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -256(%ebp) # 4-byte Spill +; X32-NEXT: adcl -228(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -176(%ebp) # 4-byte Spill ; X32-NEXT: movl -492(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 92(%eax), %eax @@ -5340,11 +5340,11 @@ ; X32-NEXT: addl -28(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload @@ -5353,65 +5353,65 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -556(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -560(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, -92(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -28(%ebp) # 4-byte Spill ; X32-NEXT: movl -552(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -168(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -460(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -460(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl -152(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -712(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -976(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -108(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: adcl -104(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill +; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill +; X32-NEXT: adcl -100(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -92(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -28(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -552(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl -552(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload @@ -5420,25 +5420,25 @@ ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -108(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -104(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl -92(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -28(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: setb -28(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -492(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx @@ -5446,10 +5446,10 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill ; X32-NEXT: movl -492(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl -92(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: setb -92(%ebp) # 1-byte Folded Spill @@ -5459,46 +5459,46 @@ ; X32-NEXT: movzbl -92(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -556(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -560(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -212(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -228(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -216(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movzbl -28(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -712(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill ; X32-NEXT: adcl -968(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl -964(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -248(%ebp) # 4-byte Spill ; X32-NEXT: adcl -972(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -248(%ebp) # 4-byte Spill -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ebx, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl -388(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 76(%eax), %eax ; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -168(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload @@ -5507,31 +5507,31 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -564(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -180(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -312(%ebp) # 4-byte Spill ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -444(%ebp), %edi # 4-byte Reload @@ -5543,17 +5543,17 @@ ; X32-NEXT: addl -716(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -992(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -92(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -252(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -156(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -244(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -180(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -5561,7 +5561,7 @@ ; X32-NEXT: addl -92(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax @@ -5574,91 +5574,91 @@ ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -500(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -496(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -252(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -244(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -92(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -48(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill +; X32-NEXT: addl -180(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -180(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill ; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -156(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -180(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -156(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl -180(%ebp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl -564(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -156(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -48(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -40(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -716(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -988(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -984(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -980(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -148(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill +; X32-NEXT: addl -144(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill ; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -108(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -144(%ebp) # 4-byte Spill -; X32-NEXT: adcl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -188(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -244(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -104(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -140(%ebp) # 4-byte Spill +; X32-NEXT: adcl -100(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -192(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -216(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -248(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -348(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -252(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -388(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl -100(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload @@ -5667,19 +5667,19 @@ ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -564(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -180(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -184(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -348(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -5687,33 +5687,33 @@ ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb -108(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -444(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -112(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl -108(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -720(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl -1008(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -108(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -300(%ebp) # 4-byte Spill -; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill +; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -108(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, -128(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -148(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -144(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -5725,7 +5725,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload @@ -5736,23 +5736,23 @@ ; X32-NEXT: movl -500(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl -496(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl -300(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl -128(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill +; X32-NEXT: adcl -144(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: setb -108(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -288(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx @@ -5775,47 +5775,47 @@ ; X32-NEXT: movl -564(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: movl -568(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -148(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -144(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: movl -128(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -112(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -108(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl -720(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -664(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -996(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -1000(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -156(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, -180(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -108(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -212(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -228(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill +; X32-NEXT: adcl -216(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -248(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -144(%ebp) # 4-byte Spill -; X32-NEXT: setb -100(%ebp) # 1-byte Folded Spill +; X32-NEXT: adcl -248(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -252(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill +; X32-NEXT: setb -148(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -492(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi @@ -5823,32 +5823,32 @@ ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -216(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -248(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -252(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -248(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -252(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -556(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -560(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -184(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -316(%ebp) # 4-byte Spill ; X32-NEXT: movl -552(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -5856,43 +5856,43 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -216(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -188(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -192(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl -188(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -192(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl -724(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -1004(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -212(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -180(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -320(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl -228(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl -216(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: adcl $0, -184(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -316(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -552(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -216(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload @@ -5903,31 +5903,31 @@ ; X32-NEXT: movl -524(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -528(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -216(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -180(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -524(%ebp) # 4-byte Spill -; X32-NEXT: adcl -320(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -528(%ebp) # 4-byte Spill -; X32-NEXT: setb -180(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -184(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -492(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -316(%ebp) # 4-byte Spill ; X32-NEXT: movl -96(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -188(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -192(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload @@ -5935,39 +5935,39 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -188(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -192(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -188(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -192(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -556(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -560(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -308(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl -316(%ebp), %edx # 4-byte Reload ; X32-NEXT: addl -524(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -528(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -180(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -184(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl -724(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -668(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl -732(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl -728(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -248(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -128(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -244(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -212(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -100(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl %eax, -252(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -128(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -248(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -228(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, -216(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -148(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -316(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, -300(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi @@ -5976,205 +5976,204 @@ ; X32-NEXT: movl %ecx, -560(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %ebx ; X32-NEXT: movl 96(%ebx), %ecx -; X32-NEXT: movl %ecx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -148(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 100(%ebx), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -148(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb -280(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movzbl -280(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -192(%ebp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -384(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -132(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -180(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -148(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -144(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -280(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -188(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -144(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -192(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -140(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 104(%eax), %ecx -; X32-NEXT: movl %ecx, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ecx, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl -128(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 108(%eax), %edx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl 108(%eax), %ebx +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, -108(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb -176(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %esi, %edi -; X32-NEXT: movzbl -176(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl -172(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %edi, %esi ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -148(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, -144(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, -128(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -188(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill -; X32-NEXT: adcl -144(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill -; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl -192(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -132(%ebp) # 4-byte Spill +; X32-NEXT: adcl -140(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -140(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -112(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -108(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -144(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -140(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -108(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -144(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -140(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -176(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -172(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -280(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -264(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -84(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -132(%ebp) # 4-byte Spill +; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -156(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -264(%ebp) # 4-byte Spill ; X32-NEXT: movl -348(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -216(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: imull -220(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -184(%ebp) # 4-byte Spill ; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -308(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -264(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -180(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -260(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill +; X32-NEXT: adcl -184(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload @@ -6188,93 +6187,92 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -220(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -84(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %edx -; X32-NEXT: movl 124(%edx), %ecx -; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl 12(%ebp), %edi +; X32-NEXT: movl 124(%edi), %ecx +; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl 120(%edx), %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl 120(%edi), %esi ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -124(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: imull -120(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl 112(%edi), %ebx ; X32-NEXT: movl 116(%edi), %ecx ; X32-NEXT: movl %ecx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %edi ; X32-NEXT: imull %ecx, %edi ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -60(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -216(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: addl -220(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl -312(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull -124(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -120(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb -260(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -256(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -124(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull -120(%ebp) # 4-byte Folded Reload ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -260(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl -256(%ebp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -184(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl -188(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl -56(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -288(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -348(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -184(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -216(%ebp) # 4-byte Spill -; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -264(%ebp) # 4-byte Spill -; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: adcl -268(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %esi, -220(%ebp) # 4-byte Spill +; X32-NEXT: adcl -132(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -260(%ebp) # 4-byte Spill +; X32-NEXT: adcl -156(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: adcl -264(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -288(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx @@ -6285,10 +6283,10 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -6298,53 +6296,53 @@ ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl -496(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl -120(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -444(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -500(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -320(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -496(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -60(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl -56(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -500(%ebp) # 4-byte Spill -; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -132(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -496(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -160(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, -156(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -416(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi @@ -6357,54 +6355,54 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -136(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb -132(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -136(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -132(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -564(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -568(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl -500(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -60(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, -56(%ebp) # 4-byte Folded Spill ; X32-NEXT: adcl -496(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -132(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -160(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -324(%ebp) # 4-byte Spill +; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -320(%ebp) # 4-byte Spill ; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -400(%ebp) # 4-byte Spill -; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -268(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl -264(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull -28(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -260(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb -256(%ebp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -260(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl -256(%ebp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl -364(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl -564(%ebp), %esi # 4-byte Folded Reload @@ -6412,11 +6410,11 @@ ; X32-NEXT: adcl -568(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -320(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -16(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -268(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, -264(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl -156(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, -364(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx @@ -6439,18 +6437,18 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill ; X32-NEXT: adcl -388(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -192(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -196(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -6462,7 +6460,7 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %bl ; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload @@ -6472,8 +6470,8 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill -; X32-NEXT: adcl -192(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill +; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill ; X32-NEXT: movl -416(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload ; X32-NEXT: imull %eax, %edi @@ -6482,7 +6480,7 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx -; X32-NEXT: imull -316(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: imull -324(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, -492(%ebp) # 4-byte Spill ; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload @@ -6492,13 +6490,13 @@ ; X32-NEXT: movl -552(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill ; X32-NEXT: adcl -492(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx @@ -6510,11 +6508,11 @@ ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl -552(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload @@ -6524,135 +6522,135 @@ ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: addl -96(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl -320(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl -340(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -192(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl -196(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill +; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx ; X32-NEXT: adcl -364(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -396(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl -160(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -384(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -144(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl -128(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -216(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -220(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -260(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl -184(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload ; X32-NEXT: adcl -288(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: addl -248(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill +; X32-NEXT: addl -252(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: adcl -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill +; X32-NEXT: adcl -216(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -132(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl -316(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -300(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -556(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -560(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -436(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl -344(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl -436(%ebp), %esi # 4-byte Folded Reload ; X32-NEXT: movl -92(%ebp), %ebx # 4-byte Reload ; X32-NEXT: adcl -472(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -40(%ebp) # 4-byte Spill +; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -132(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl -212(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill ; X32-NEXT: adcl -284(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -156(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl -136(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -116(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl -16(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -432(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill -; X32-NEXT: adcl -456(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -308(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -252(%ebp) # 4-byte Spill +; X32-NEXT: addl -432(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, -312(%ebp) # 4-byte Spill +; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -456(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -152(%ebp) # 4-byte Spill +; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, -244(%ebp) # 4-byte Spill ; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl -64(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -92(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -156(%ebp) # 4-byte Spill +; X32-NEXT: adcl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl -40(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl -276(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -272(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -276(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -240(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -172(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -20(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl -168(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -76(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -44(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl -24(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %ecx # 4-byte Reload ; X32-NEXT: adcl -336(%ebp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill ; X32-NEXT: adcl -360(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: adcl -392(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl -412(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill @@ -6685,36 +6683,36 @@ ; X32-NEXT: movl %esi, 48(%ecx) ; X32-NEXT: movl -540(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 52(%ecx) -; X32-NEXT: movl -228(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -232(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 56(%ecx) -; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl -200(%ebp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 60(%ecx) -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 64(%ecx) -; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 68(%ecx) -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 72(%ecx) ; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 76(%ecx) -; X32-NEXT: movl -156(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 80(%ecx) -; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 84(%ecx) ; X32-NEXT: movl %ebx, 88(%ecx) -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 92(%ecx) ; X32-NEXT: movl %edi, 96(%ecx) -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 100(%ecx) -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 104(%ecx) -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 108(%ecx) ; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 112(%ecx) ; X32-NEXT: movl %edx, 116(%ecx) -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 120(%ecx) ; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 124(%ecx) @@ -6764,17 +6762,16 @@ ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %r12 -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: addq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: movq %r9, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: addq %rbp, %rcx +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp @@ -6784,38 +6781,37 @@ ; X64-NEXT: movq 8(%rsi), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rbp -; X64-NEXT: xorl %r11d, %r11d +; X64-NEXT: xorl %r9d, %r9d ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rcx, %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rdi, %r15 ; X64-NEXT: adcq %rcx, %rbp -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%rsi), %rax ; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: addq %rax, %r14 -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq %rbp, %r14 -; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: adcq %rdx, %rax +; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: adcq %rbx, %rax +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %r9, %rax -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rcx, %r8 +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq (%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: xorl %r8d, %r8d @@ -6823,44 +6819,44 @@ ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq 32(%r13), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r8 -; X64-NEXT: xorl %r8d, %r8d +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq %rbx, %r8 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: addq %r9, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r15, %rax +; X64-NEXT: movq %r11, %rax +; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r14, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r15, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %rdi +; X64-NEXT: movq %r12, %rax +; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbp, %r14 +; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, %rdi ; X64-NEXT: movq 8(%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rsi, %r11 +; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rsi, %r12 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rcx, %r11 +; X64-NEXT: addq %r8, %r12 ; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl @@ -6869,92 +6865,91 @@ ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%r10), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: addq %rax, %r8 +; X64-NEXT: movq %r8, %rcx +; X64-NEXT: addq %rax, %rcx ; X64-NEXT: movq %rsi, %r10 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: addq %rbp, %r8 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r10 -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq %r8, %rdx +; X64-NEXT: movq %r8, %r14 +; X64-NEXT: movq %r14, (%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rdx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: movq %r12, %rsi +; X64-NEXT: adcq %rsi, %r15 ; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: adcq %r10, %rdi ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq 40(%rsi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq 40(%rdi), %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: xorl %r9d, %r9d +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r13, %rdi -; X64-NEXT: adcq %r9, %rbp +; X64-NEXT: addq %r13, %rcx +; X64-NEXT: adcq %r11, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp -; X64-NEXT: movzbl %bl, %r11d -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: movq 48(%rsi), %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r14 +; X64-NEXT: movzbl %bl, %ebx +; X64-NEXT: adcq %rdx, %rbx +; X64-NEXT: movq 48(%rdi), %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: addq %rax, %rbx -; X64-NEXT: movq %r9, %rsi -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rbp, %rbx -; X64-NEXT: adcq %r11, %rsi -; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: addq %r13, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r8 +; X64-NEXT: movq %r13, %r12 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq %r11, %rdi +; X64-NEXT: adcq %rdx, %rdi +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rbx, %rdi +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r13, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r12, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rcx -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r10 +; X64-NEXT: adcq %rdi, %r10 ; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: addq %r13, %rax -; X64-NEXT: movq (%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: adcq %r11, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, %r10 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %r11 ; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx @@ -6969,19 +6964,19 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %rdi, %r13 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r12 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq %r9, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -6993,65 +6988,63 @@ ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r13 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %rsi ; X64-NEXT: adcq %r8, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r10, %rbx +; X64-NEXT: movq %r11, %rbx ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq 24(%rax), %r9 ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 ; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: setb %bl +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %dil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %rbp -; X64-NEXT: movq (%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: adcq %r9, %rbx +; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: adcq %r10, %rbx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: addq %rsi, %r10 -; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %rsi, %r14 +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %r13, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp @@ -7061,88 +7054,86 @@ ; X64-NEXT: setb %r15b ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r11, %rdi +; X64-NEXT: addq %r14, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r12 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r9, %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: addq %r14, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: adcq %r9, %r14 +; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: adcq %r10, %r9 ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: adcq %rdx, %r9 ; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rbx, %r11 +; X64-NEXT: adcq %rbx, %r14 ; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %r14 +; X64-NEXT: adcq $0, %r9 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload ; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rbp, %r15 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rbp +; X64-NEXT: movq %r9, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -7154,8 +7145,8 @@ ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -7163,7 +7154,7 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq %rsi, %rbp -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx @@ -7175,11 +7166,11 @@ ; X64-NEXT: adcq %r15, %rsi ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r14 ; X64-NEXT: mulq %rdi @@ -7188,11 +7179,11 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil ; X64-NEXT: movq %r14, %rax @@ -7200,7 +7191,7 @@ ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload ; X64-NEXT: addq %r13, %rdi ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload @@ -7208,65 +7199,63 @@ ; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: addq %rbx, %r11 ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rsi, %r9 +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %r8, %rdi ; X64-NEXT: adcq %r10, %rbp -; X64-NEXT: setb %r9b +; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: addq %r8, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rcx -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq %rbx, %rsi -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: adcq %r14, %rsi -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %r15 +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %r13 +; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: adcq %rdx, %r13 ; X64-NEXT: addq %rdi, %r11 -; X64-NEXT: adcq %rbp, %r15 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: adcq %rbp, %r8 +; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[0-9]+}}(%rsp) # 8-byte Folded Spill @@ -7277,104 +7266,106 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: movq %rcx, %rbp ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %r10, %r9 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: movq %r12, %r10 ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r9 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r12, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdi, %rbp +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq %rax, %rdi ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: addq %r14, %rbx -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: adcq %r8, %rdi ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %r12, %r11 +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r8 ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r14, %rcx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: movq 56(%rax), %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq 56(%rax), %rsi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %r14 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %cl ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: addq %r8, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: adcq %r11, %rsi ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %rsi ; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: adcq %r15, %r14 +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rdi, %r14 ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: addq %r9, %rcx @@ -7389,97 +7380,92 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r9, %rbx -; X64-NEXT: adcq $0, %r15 +; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %r15, %r9 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: adcq %r12, %r9 ; X64-NEXT: setb %bl ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r9, %rax ; X64-NEXT: movzbl %bl, %edi ; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload -; X64-NEXT: addq %r11, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq %r13, %rbp -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: addq %r8, %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: adcq %r11, %r10 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: adcq %rsi, %r8 +; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq %rax, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, {{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: adcq $0, %r8 -; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r10 +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rbx, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: adcq %rdx, %r15 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r10, %rdi +; X64-NEXT: movq %r14, %rdi ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -7491,80 +7477,75 @@ ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r13 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: addq %rcx, %r13 ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: adcq %r12, %r11 +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq %r9, %r11 ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r15 ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r8, %rcx +; X64-NEXT: addq %rbx, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %cl -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %r13, %r9 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: addq %r13, %rsi -; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r14 ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %r13, %r9 +; X64-NEXT: movq %r9, %r13 ; X64-NEXT: adcq %r11, %r8 -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %r8, %r15 +; X64-NEXT: adcq $0, %r14 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %r10, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rsi @@ -7573,66 +7554,65 @@ ; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %r10 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rdi, %r9 ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl +; X64-NEXT: setb %sil ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r13, %rsi +; X64-NEXT: addq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %r11 +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rax, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq %r15, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %rbp, %r11 -; X64-NEXT: movq %r11, (%rsp) # 8-byte Spill +; X64-NEXT: adcq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r10, %r15 +; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq $0, %r9 +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq 64(%r9), %r11 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq 64(%rcx), %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 72(%rcx), %rsi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq 72(%r9), %rsi +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq %rdx, %rsi @@ -7640,10 +7620,10 @@ ; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r13 +; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rsi, %rdi @@ -7654,142 +7634,139 @@ ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: addq %rbx, %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: adcq %r14, %r15 -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %rdi, %r10 ; X64-NEXT: adcq %rcx, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %rax ; X64-NEXT: movq %r11, %rsi ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %r13, %rbx +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq %r8, %r14 -; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq 80(%rbp), %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq 80(%r9), %rdi +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq 88(%rbp), %r10 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq 88(%r9), %r9 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %r12b +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rbp, %rsi -; X64-NEXT: movzbl %r11b, %eax +; X64-NEXT: movzbl %r12b, %eax ; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: addq %r9, %rbp +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rsi, %rbp -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: addq %rbx, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rax +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %rcx, %r13 +; X64-NEXT: addq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %rax -; X64-NEXT: addq %r12, %rbp -; X64-NEXT: movq %rbp, %r8 -; X64-NEXT: adcq %r15, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: adcq %r15, %r13 ; X64-NEXT: setb %r14b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: addq %r11, %rsi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq %r12, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r8, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r8, %r15 +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r13, %rbp +; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %r10 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: imulq %rax, %r9 +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r10, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: imulq %rbp, %rdi ; X64-NEXT: addq %rdx, %rdi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload @@ -7800,7 +7777,7 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r8, %r9 @@ -7808,11 +7785,11 @@ ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx @@ -7833,12 +7810,11 @@ ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: addq %r9, %r13 ; X64-NEXT: adcq %r8, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 120(%rdx), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq 120(%rbp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 112(%rdx), %rsi -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 112(%rbp), %rsi ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r11 @@ -7854,7 +7830,7 @@ ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r11, %r9 @@ -7893,49 +7869,48 @@ ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq 80(%rsi), %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq 80(%r9), %rsi +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq 88(%rsi), %rax -; X64-NEXT: movq %rsi, %r9 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq 88(%r9), %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r8, %rbx +; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rbx, %r14 ; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq (%rsp), %r12 # 8-byte Reload ; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload @@ -7948,8 +7923,8 @@ ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%rdi), %r9 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq 72(%rdi), %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx @@ -7959,11 +7934,10 @@ ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rbp @@ -7981,20 +7955,20 @@ ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r8 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp @@ -8007,74 +7981,72 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: adcq %r8, %r11 ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq (%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload ; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: addq %rsi, %r15 ; X64-NEXT: adcq %r10, %r11 -; X64-NEXT: setb %r10b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %r8b, %ecx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: addq %r14, %rsi +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq %r13, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r15, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq %r15, %rbp +; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: adcq %r11, %r12 +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload +; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq 96(%rbp), %rcx ; X64-NEXT: imulq %rcx, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r12, %rsi ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rdi, %rdx @@ -8100,7 +8072,7 @@ ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r9 ; X64-NEXT: mulq %rcx @@ -8125,32 +8097,31 @@ ; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq %rdi, %rbx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: imulq %r13, %rsi +; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload -; X64-NEXT: imulq %r11, %rcx -; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: imulq %r11, %r8 +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: imulq %r14, %rax +; X64-NEXT: movq %r14, %rax +; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r8, %r10 -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: addq %r9, %r10 +; X64-NEXT: adcq %r8, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r8 @@ -8174,7 +8145,7 @@ ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r10, %rax ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq %r12, %rsi ; X64-NEXT: adcq %rbp, %rax ; X64-NEXT: adcq %rbx, %rdx @@ -8184,23 +8155,23 @@ ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload +; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, %r9 ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, %r10 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload Index: test/CodeGen/X86/mul-i256.ll =================================================================== --- test/CodeGen/X86/mul-i256.ll +++ test/CodeGen/X86/mul-i256.ll @@ -125,11 +125,10 @@ ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl 8(%ecx), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 8(%edi), %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, %ecx ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -139,9 +138,10 @@ ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl 12(%ecx), %ecx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl 12(%edi), %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -28,7 +28,7 @@ ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %ebx ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill @@ -65,7 +65,7 @@ ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -121,8 +121,8 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload @@ -164,7 +164,7 @@ ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -214,7 +214,7 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill @@ -224,7 +224,7 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %esi @@ -287,7 +287,7 @@ ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill @@ -304,7 +304,7 @@ ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx @@ -313,7 +313,7 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: mull (%esp) # 4-byte Folded Reload +; X32-NEXT: mull {{[0-9]+}}(%esp) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx @@ -343,7 +343,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax @@ -459,7 +459,7 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax @@ -498,7 +498,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax @@ -524,7 +524,7 @@ ; X32-NEXT: adcl %eax, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload @@ -544,7 +544,7 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx @@ -602,7 +602,7 @@ ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill @@ -611,19 +611,19 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %ebp ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -661,7 +661,7 @@ ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -793,7 +793,7 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax @@ -834,7 +834,7 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebp @@ -887,7 +887,7 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -948,7 +948,6 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload @@ -967,19 +966,19 @@ ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 40(%eax), %ebp -; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl 44(%ebx), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 44(%eax), %ebx ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx @@ -1023,7 +1022,7 @@ ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi @@ -1075,14 +1074,14 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: imull %eax, %esi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %edi ; X32-NEXT: adcl %ebp, %esi -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp @@ -1106,16 +1105,15 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl 60(%edx), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 60(%edi), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl 56(%edx), %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl 56(%edi), %esi ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx @@ -1129,13 +1127,13 @@ ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx @@ -1161,7 +1159,7 @@ ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload @@ -1182,7 +1180,7 @@ ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl 44(%ecx), %ecx ; X32-NEXT: movl %ecx, %eax @@ -1217,7 +1215,7 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl 32(%esi), %edi ; X32-NEXT: movl %edi, %eax @@ -1261,17 +1259,17 @@ ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %ebp, %eax -; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi @@ -1300,28 +1298,28 @@ ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx @@ -1329,11 +1327,11 @@ ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload @@ -1471,7 +1469,7 @@ ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload @@ -1487,7 +1485,7 @@ ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload @@ -1515,7 +1513,7 @@ ; X32-NEXT: movl %edi, 32(%ecx) ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 36(%ecx) -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 40(%ecx) ; X32-NEXT: movl %esi, 44(%ecx) ; X32-NEXT: movl %edx, 48(%ecx) @@ -1683,14 +1681,14 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi @@ -1699,7 +1697,7 @@ ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx @@ -1771,12 +1769,11 @@ ; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: addq %r14, %r9 ; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 56(%rdx), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq 56(%rbp), %rcx ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 48(%rdx), %rbx -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 48(%rbp), %rbx ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %rsi Index: test/CodeGen/X86/mul128.ll =================================================================== --- test/CodeGen/X86/mul128.ll +++ test/CodeGen/X86/mul128.ll @@ -6,8 +6,8 @@ ; X64-LABEL: foo: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rax, %rcx ; X64-NEXT: mulq %r8 ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi Index: test/CodeGen/X86/mul64.ll =================================================================== --- test/CodeGen/X86/mul64.ll +++ test/CodeGen/X86/mul64.ll @@ -19,8 +19,8 @@ ; ; X64-LABEL: foo: ; X64: # %bb.0: -; X64-NEXT: imulq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rsi, %rax ; X64-NEXT: retq %k = mul i64 %t, %u ret i64 %k Index: test/CodeGen/X86/mwaitx-schedule.ll =================================================================== --- test/CodeGen/X86/mwaitx-schedule.ll +++ test/CodeGen/X86/mwaitx-schedule.ll @@ -6,22 +6,22 @@ define void @foo(i8* %P, i32 %E, i32 %H) nounwind { ; GENERIC-LABEL: foo: ; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: monitorx # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER4-LABEL: foo: ; BDVER4: # %bb.0: -; BDVER4-NEXT: leaq (%rdi), %rax ; BDVER4-NEXT: movl %esi, %ecx +; BDVER4-NEXT: leaq (%rdi), %rax ; BDVER4-NEXT: monitorx ; BDVER4-NEXT: retq ; ; ZNVER1-LABEL: foo: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: monitorx # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.monitorx(i8* %P, i32 %E, i32 %H) @@ -33,9 +33,9 @@ ; GENERIC-LABEL: bar: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pushq %rbx # sched: [5:1.00] -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] ; GENERIC-NEXT: movl %edx, %ebx # sched: [1:0.33] +; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: mwaitx # sched: [100:0.33] ; GENERIC-NEXT: popq %rbx # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -43,9 +43,9 @@ ; BDVER4-LABEL: bar: ; BDVER4: # %bb.0: ; BDVER4-NEXT: pushq %rbx -; BDVER4-NEXT: movl %edi, %ecx -; BDVER4-NEXT: movl %esi, %eax ; BDVER4-NEXT: movl %edx, %ebx +; BDVER4-NEXT: movl %esi, %eax +; BDVER4-NEXT: movl %edi, %ecx ; BDVER4-NEXT: mwaitx ; BDVER4-NEXT: popq %rbx ; BDVER4-NEXT: retq @@ -53,9 +53,9 @@ ; ZNVER1-LABEL: bar: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: pushq %rbx # sched: [1:0.50] -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] ; ZNVER1-NEXT: movl %edx, %ebx # sched: [1:0.25] +; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: mwaitx # sched: [100:?] ; ZNVER1-NEXT: popq %rbx # sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/mwaitx.ll =================================================================== --- test/CodeGen/X86/mwaitx.ll +++ test/CodeGen/X86/mwaitx.ll @@ -4,8 +4,9 @@ ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=bdver4 | FileCheck %s -check-prefix=WIN64 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK-LABEL: # %bb.0: +; CHECK-DAG: leaq (%rdi), %rax +; CHECK-DAG: movl %esi, %ecx ; CHECK-NEXT: monitorx ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -21,13 +22,15 @@ declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edx, %ebx +; CHECK: pushq +; CHECK-DAG: movl %edi, %ecx +; CHECK-DAG: movl %esi, %eax +; CHECK-DAG: movl %edx, %ebx ; CHECK-NEXT: mwaitx ; WIN64-LABEL: bar: -; WIN64: movl %edx, %eax -; WIN64: movl %r8d, %ebx +; WIN64: pushq +; WIN64-DAG: movl %edx, %eax +; WIN64-DAG: movl %r8d, %ebx ; WIN64-NEXT: mwaitx define void @bar(i32 %E, i32 %H, i32 %C) nounwind { entry: Index: test/CodeGen/X86/negate-i1.ll =================================================================== --- test/CodeGen/X86/negate-i1.ll +++ test/CodeGen/X86/negate-i1.ll @@ -5,9 +5,10 @@ define i8 @select_i8_neg1_or_0(i1 %a) { ; X64-LABEL: select_i8_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0: @@ -23,8 +24,9 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i8_neg1_or_0_zeroext: ; X64: # %bb.0: -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0_zeroext: @@ -39,9 +41,10 @@ define i16 @select_i16_neg1_or_0(i1 %a) { ; X64-LABEL: select_i16_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0: @@ -58,8 +61,9 @@ define i16 @select_i16_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i16_neg1_or_0_zeroext: ; X64: # %bb.0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0_zeroext: @@ -75,9 +79,9 @@ define i32 @select_i32_neg1_or_0(i1 %a) { ; X64-LABEL: select_i32_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0: @@ -93,8 +97,8 @@ define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i32_neg1_or_0_zeroext: ; X64: # %bb.0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0_zeroext: @@ -109,10 +113,9 @@ define i64 @select_i64_neg1_or_0(i1 %a) { ; X64-LABEL: select_i64_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $1, %edi -; X64-NEXT: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X32-LABEL: select_i64_neg1_or_0: Index: test/CodeGen/X86/negate-shift.ll =================================================================== --- test/CodeGen/X86/negate-shift.ll +++ test/CodeGen/X86/negate-shift.ll @@ -4,8 +4,8 @@ define i32 @neg_lshr_signbit(i32 %x) { ; X64-LABEL: neg_lshr_signbit: ; X64: # %bb.0: -; X64-NEXT: sarl $31, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl $31, %eax ; X64-NEXT: retq %sh = lshr i32 %x, 31 %neg = sub i32 0, %sh @@ -15,8 +15,8 @@ define i64 @neg_ashr_signbit(i64 %x) { ; X64-LABEL: neg_ashr_signbit: ; X64: # %bb.0: -; X64-NEXT: shrq $63, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $63, %rax ; X64-NEXT: retq %sh = ashr i64 %x, 63 %neg = sub i64 0, %sh Index: test/CodeGen/X86/negate.ll =================================================================== --- test/CodeGen/X86/negate.ll +++ test/CodeGen/X86/negate.ll @@ -42,8 +42,9 @@ define i8 @negate_zero_or_minsigned(i8 %x) { ; CHECK-LABEL: negate_zero_or_minsigned: ; CHECK: # %bb.0: -; CHECK-NEXT: shlb $7, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %signbit = shl i8 %x, 7 %neg = sub i8 0, %signbit Index: test/CodeGen/X86/no-sse2-avg.ll =================================================================== --- test/CodeGen/X86/no-sse2-avg.ll +++ test/CodeGen/X86/no-sse2-avg.ll @@ -5,9 +5,9 @@ define <16 x i8> @PR27973() { ; CHECK-LABEL: PR27973: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $0, 8(%rdi) -; CHECK-NEXT: movq $0, (%rdi) ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq $0, 8(%rax) +; CHECK-NEXT: movq $0, (%rax) ; CHECK-NEXT: retq %t0 = zext <16 x i8> zeroinitializer to <16 x i32> %t1 = add nuw nsw <16 x i32> %t0, Index: test/CodeGen/X86/not-and-simplify.ll =================================================================== --- test/CodeGen/X86/not-and-simplify.ll +++ test/CodeGen/X86/not-and-simplify.ll @@ -7,9 +7,9 @@ define i32 @shrink_xor_constant1(i32 %x) { ; ALL-LABEL: shrink_xor_constant1: ; ALL: # %bb.0: -; ALL-NEXT: shrl $31, %edi -; ALL-NEXT: xorl $1, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shrl $31, %eax +; ALL-NEXT: xorl $1, %eax ; ALL-NEXT: retq %sh = lshr i32 %x, 31 %not = xor i32 %sh, -1 @@ -34,9 +34,10 @@ define i8 @shrink_xor_constant2(i8 %x) { ; ALL-LABEL: shrink_xor_constant2: ; ALL: # %bb.0: -; ALL-NEXT: shlb $5, %dil -; ALL-NEXT: xorb $-32, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shlb $5, %al +; ALL-NEXT: xorb $-32, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %sh = shl i8 %x, 5 %not = xor i8 %sh, -1 Index: test/CodeGen/X86/palignr.ll =================================================================== --- test/CodeGen/X86/palignr.ll +++ test/CodeGen/X86/palignr.ll @@ -167,16 +167,15 @@ ; CHECK-SSE2-LABEL: test9: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 -; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero -; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; CHECK-SSE2-NEXT: por %xmm0, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero +; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] +; CHECK-SSE2-NEXT: por %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retl ; ; CHECK-SSSE3-LABEL: test9: ; CHECK-SSSE3: # %bb.0: -; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: retl ; ; CHECK-AVX-LABEL: test9: Index: test/CodeGen/X86/peep-setb.ll =================================================================== --- test/CodeGen/X86/peep-setb.ll +++ test/CodeGen/X86/peep-setb.ll @@ -7,9 +7,10 @@ define i8 @test1(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -20,9 +21,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -33,9 +34,9 @@ define i64 @test3(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -46,9 +47,10 @@ define i8 @test4(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: sbbb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: sbbb $0, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -59,9 +61,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -72,9 +74,9 @@ define i64 @test6(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: sbbq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sbbq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -85,9 +87,10 @@ define i8 @test7(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = sext i1 %cmp to i8 @@ -98,9 +101,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = sext i1 %cmp to i32 @@ -111,9 +114,9 @@ define i64 @test9(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = sext i1 %cmp to i64 Index: test/CodeGen/X86/pku.ll =================================================================== --- test/CodeGen/X86/pku.ll +++ test/CodeGen/X86/pku.ll @@ -5,9 +5,9 @@ define void @test_x86_wrpkru(i32 %src) { ; CHECK-LABEL: test_x86_wrpkru: ; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: wrpkru ; CHECK-NEXT: retq call void @llvm.x86.wrpkru(i32 %src) Index: test/CodeGen/X86/pr12360.ll =================================================================== --- test/CodeGen/X86/pr12360.ll +++ test/CodeGen/X86/pr12360.ll @@ -32,8 +32,9 @@ define zeroext i1 @f3(i1 %x) { ; CHECK-LABEL: f3: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: andb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq entry: @@ -45,9 +46,10 @@ define zeroext i1 @f4(i32 %x) { ; CHECK-LABEL: f4: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: shrl $15, %edi -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $15, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/pr15705.ll =================================================================== --- test/CodeGen/X86/pr15705.ll +++ test/CodeGen/X86/pr15705.ll @@ -22,14 +22,14 @@ ; ; X64-LABEL: PR15705: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edx, %eax ; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: je .LBB0_2 ; X64-NEXT: # %bb.1: # %if.end -; X64-NEXT: cmpl %edx, %edi +; X64-NEXT: cmpl %eax, %edi ; X64-NEXT: cmovel %ecx, %esi -; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %esi, %eax ; X64-NEXT: .LBB0_2: # %return -; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq entry: %cmp = icmp eq i32 %x, %a Index: test/CodeGen/X86/pr15981.ll =================================================================== --- test/CodeGen/X86/pr15981.ll +++ test/CodeGen/X86/pr15981.ll @@ -19,9 +19,9 @@ ; ; X64-LABEL: fn1: ; X64: # %bb.0: -; X64-NEXT: testl %esi, %esi -; X64-NEXT: cmovel %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: testl %esi, %esi +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %3 = icmp ne i32 %1, 0 %4 = select i1 %3, i32 %0, i32 0 Index: test/CodeGen/X86/pr23664.ll =================================================================== --- test/CodeGen/X86/pr23664.ll +++ test/CodeGen/X86/pr23664.ll @@ -7,8 +7,9 @@ ret i2 %or ; CHECK-LABEL: f: -; CHECK: addb %dil, %dil -; CHECK-NEXT: orb $1, %dil -; CHECK-NEXT: movl %edi, %eax +; CHECK: movl %edi, %eax +; CHECK-NEXT: addb %al, %al +; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq } Index: test/CodeGen/X86/pr28173.ll =================================================================== --- test/CodeGen/X86/pr28173.ll +++ test/CodeGen/X86/pr28173.ll @@ -78,8 +78,9 @@ define i8 @foo8(i1 zeroext %i) #0 { ; CHECK-LABEL: foo8: ; CHECK: # %bb.0: -; CHECK-NEXT: orb $-2, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $-2, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq br label %bb Index: test/CodeGen/X86/pr34657.ll =================================================================== --- test/CodeGen/X86/pr34657.ll +++ test/CodeGen/X86/pr34657.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s +; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s define <112 x i8> @pr34657() local_unnamed_addr { -; CHECK-LABEL: pr34657 +; CHECK-LABEL: pr34657: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: vmovups (%rax), %xmm0 ; CHECK-NEXT: vmovups (%rax), %ymm1 ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; CHECK-NEXT: vmovups (%rax), %zmm2 -; CHECK-NEXT: vmovaps %ymm1, 64(%rdi) -; CHECK-NEXT: vmovaps %zmm2, (%rdi) -; CHECK-NEXT: vextractf32x4 $2, %zmm0, 96(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: vmovaps %ymm1, 64(%rax) +; CHECK-NEXT: vmovaps %zmm2, (%rax) +; CHECK-NEXT: vextractf32x4 $2, %zmm0, 96(%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/ptest.ll =================================================================== --- test/CodeGen/X86/ptest.ll +++ test/CodeGen/X86/ptest.ll @@ -233,16 +233,16 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel128: ; SSE41: # %bb.0: -; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi ; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: ptest %xmm0, %xmm0 +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel128: ; AVX: # %bb.0: -; AVX-NEXT: vptest %xmm0, %xmm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %xmm0, %xmm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq %t0 = bitcast <4 x i32> %input to i128 %t1 = icmp ne i128 %t0, 0 @@ -253,17 +253,17 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel256: ; SSE41: # %bb.0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel256: ; AVX: # %bb.0: -; AVX-NEXT: vptest %ymm0, %ymm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %ymm0, %ymm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %t0 = bitcast <8 x i32> %input to i256 @@ -275,45 +275,45 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel512: ; SSE41: # %bb.0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm3, %xmm1 ; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: ptest %xmm1, %xmm1 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX1-LABEL: vecsel512: ; AVX1: # %bb.0: +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vptest %ymm0, %ymm0 -; AVX1-NEXT: cmovel %esi, %edi -; AVX1-NEXT: movl %edi, %eax +; AVX1-NEXT: cmovel %esi, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512-NEXT: vmovq %xmm1, %rax +; AVX512-NEXT: vmovq %xmm1, %rcx ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512-NEXT: vmovq %xmm2, %rcx -; AVX512-NEXT: orq %rax, %rcx +; AVX512-NEXT: vmovq %xmm2, %rdx +; AVX512-NEXT: orq %rcx, %rdx ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512-NEXT: vmovq %xmm3, %rax -; AVX512-NEXT: orq %rcx, %rax -; AVX512-NEXT: vmovq %xmm0, %rcx -; AVX512-NEXT: orq %rax, %rcx -; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vpextrq $1, %xmm2, %rdx -; AVX512-NEXT: orq %rax, %rdx -; AVX512-NEXT: vpextrq $1, %xmm3, %rax -; AVX512-NEXT: orq %rdx, %rax -; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: orq %rax, %rdx +; AVX512-NEXT: vmovq %xmm3, %rcx +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: vmovq %xmm0, %rdx ; AVX512-NEXT: orq %rcx, %rdx -; AVX512-NEXT: cmovel %esi, %edi -; AVX512-NEXT: movl %edi, %eax +; AVX512-NEXT: vpextrq $1, %xmm1, %rcx +; AVX512-NEXT: vpextrq $1, %xmm2, %rdi +; AVX512-NEXT: orq %rcx, %rdi +; AVX512-NEXT: vpextrq $1, %xmm3, %rcx +; AVX512-NEXT: orq %rdi, %rcx +; AVX512-NEXT: vpextrq $1, %xmm0, %rdi +; AVX512-NEXT: orq %rcx, %rdi +; AVX512-NEXT: orq %rdx, %rdi +; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %t0 = bitcast <16 x i32> %input to i512 Index: test/CodeGen/X86/rot16.ll =================================================================== --- test/CodeGen/X86/rot16.ll +++ test/CodeGen/X86/rot16.ll @@ -13,8 +13,10 @@ ; X64-LABEL: foo: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldw %cl, %ax, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = shl i16 %x, %z %t1 = sub i16 16, %z @@ -35,8 +37,10 @@ ; X64-LABEL: bar: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldw %cl, %di, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = shl i16 %y, %z %t1 = sub i16 16, %z @@ -56,8 +60,10 @@ ; X64-LABEL: un: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdw %cl, %ax, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %x, %z %t1 = sub i16 16, %z @@ -78,8 +84,10 @@ ; X64-LABEL: bu: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdw %cl, %di, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %y, %z %t1 = sub i16 16, %z @@ -97,8 +105,9 @@ ; ; X64-LABEL: xfoo: ; X64: # %bb.0: -; X64-NEXT: rolw $5, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $5, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %x, 11 %t1 = shl i16 %x, 5 @@ -116,8 +125,9 @@ ; ; X64-LABEL: xbar: ; X64: # %bb.0: -; X64-NEXT: shldw $5, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: shldw $5, %di, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = shl i16 %y, 5 %t1 = lshr i16 %x, 11 @@ -134,8 +144,9 @@ ; ; X64-LABEL: xun: ; X64: # %bb.0: -; X64-NEXT: rolw $11, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $11, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %x, 5 %t1 = shl i16 %x, 11 @@ -153,8 +164,9 @@ ; ; X64-LABEL: xbu: ; X64: # %bb.0: -; X64-NEXT: shldw $11, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shldw $11, %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %y, 5 %t1 = shl i16 %x, 11 Index: test/CodeGen/X86/rot64.ll =================================================================== --- test/CodeGen/X86/rot64.ll +++ test/CodeGen/X86/rot64.ll @@ -6,9 +6,10 @@ define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: foo: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rolq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: rolq %cl, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %x, %z @@ -21,9 +22,10 @@ define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bar: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shldq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: shldq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, %z @@ -36,9 +38,10 @@ define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: un: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rorq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: rorq %cl, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %x, %z @@ -51,9 +54,10 @@ define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bu: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shrdq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: shrdq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, %z @@ -66,14 +70,14 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xfoo: ; X64: # %bb.0: # %entry -; X64-NEXT: rolq $7, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $7, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xfoo: ; SHLD: # %bb.0: # %entry -; SHLD-NEXT: shldq $7, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $7, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xfoo: @@ -115,8 +119,8 @@ define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbar: ; ALL: # %bb.0: # %entry -; ALL-NEXT: shrdq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shrdq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, 7 @@ -128,14 +132,14 @@ define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xun: ; X64: # %bb.0: # %entry -; X64-NEXT: rolq $57, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $57, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xun: ; SHLD: # %bb.0: # %entry -; SHLD-NEXT: shldq $57, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $57, %rax, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xun: @@ -177,8 +181,8 @@ define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbu: ; ALL: # %bb.0: # %entry -; ALL-NEXT: shldq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shldq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, 7 Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -43,8 +43,9 @@ ; 64-LABEL: rotl64: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolq %cl, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rolq %cl, %rax ; 64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i64 %B = shl i64 %A, %shift.upgrd.1 @@ -96,8 +97,9 @@ ; 64-LABEL: rotr64: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorq %cl, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rorq %cl, %rax ; 64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i64 %B = lshr i64 %A, %shift.upgrd.3 @@ -120,8 +122,8 @@ ; ; 64-LABEL: rotli64: ; 64: # %bb.0: -; 64-NEXT: rolq $5, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq $5, %rax ; 64-NEXT: retq %B = shl i64 %A, 5 %C = lshr i64 %A, 59 @@ -141,8 +143,8 @@ ; ; 64-LABEL: rotri64: ; 64: # %bb.0: -; 64-NEXT: rolq $59, %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq $59, %rax ; 64-NEXT: retq %B = lshr i64 %A, 5 %C = shl i64 %A, 59 @@ -162,8 +164,8 @@ ; ; 64-LABEL: rotl1_64: ; 64: # %bb.0: -; 64-NEXT: rolq %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rolq %rax ; 64-NEXT: retq %B = shl i64 %A, 1 %C = lshr i64 %A, 63 @@ -183,8 +185,8 @@ ; ; 64-LABEL: rotr1_64: ; 64: # %bb.0: -; 64-NEXT: rorq %rdi ; 64-NEXT: movq %rdi, %rax +; 64-NEXT: rorq %rax ; 64-NEXT: retq %B = shl i64 %A, 63 %C = lshr i64 %A, 1 @@ -203,8 +205,9 @@ ; 64-LABEL: rotl32: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: roll %cl, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: roll %cl, %eax ; 64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i32 %B = shl i32 %A, %shift.upgrd.1 @@ -226,8 +229,9 @@ ; 64-LABEL: rotr32: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorl %cl, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rorl %cl, %eax ; 64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i32 %B = lshr i32 %A, %shift.upgrd.3 @@ -247,8 +251,8 @@ ; ; 64-LABEL: rotli32: ; 64: # %bb.0: -; 64-NEXT: roll $5, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll $5, %eax ; 64-NEXT: retq %B = shl i32 %A, 5 %C = lshr i32 %A, 27 @@ -265,8 +269,8 @@ ; ; 64-LABEL: rotri32: ; 64: # %bb.0: -; 64-NEXT: roll $27, %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll $27, %eax ; 64-NEXT: retq %B = lshr i32 %A, 5 %C = shl i32 %A, 27 @@ -283,8 +287,8 @@ ; ; 64-LABEL: rotl1_32: ; 64: # %bb.0: -; 64-NEXT: roll %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: roll %eax ; 64-NEXT: retq %B = shl i32 %A, 1 %C = lshr i32 %A, 31 @@ -301,8 +305,8 @@ ; ; 64-LABEL: rotr1_32: ; 64: # %bb.0: -; 64-NEXT: rorl %edi ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorl %eax ; 64-NEXT: retq %B = shl i32 %A, 31 %C = lshr i32 %A, 1 @@ -321,8 +325,10 @@ ; 64-LABEL: rotl16: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolw %cl, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rolw %cl, %ax +; 64-NEXT: # kill: def $ax killed $ax killed $eax ; 64-NEXT: retq %shift.upgrd.5 = zext i8 %Amt to i16 %B = shl i16 %A, %shift.upgrd.5 @@ -344,8 +350,10 @@ ; 64-LABEL: rotr16: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorw %cl, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rorw %cl, %ax +; 64-NEXT: # kill: def $ax killed $ax killed $eax ; 64-NEXT: retq %shift.upgrd.7 = zext i8 %Amt to i16 %B = lshr i16 %A, %shift.upgrd.7 @@ -365,8 +373,9 @@ ; ; 64-LABEL: rotli16: ; 64: # %bb.0: -; 64-NEXT: rolw $5, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw $5, %ax +; 64-NEXT: # kill: def $ax killed $ax killed $eax ; 64-NEXT: retq %B = shl i16 %A, 5 %C = lshr i16 %A, 11 @@ -383,8 +392,9 @@ ; ; 64-LABEL: rotri16: ; 64: # %bb.0: -; 64-NEXT: rolw $11, %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw $11, %ax +; 64-NEXT: # kill: def $ax killed $ax killed $eax ; 64-NEXT: retq %B = lshr i16 %A, 5 %C = shl i16 %A, 11 @@ -401,8 +411,9 @@ ; ; 64-LABEL: rotl1_16: ; 64: # %bb.0: -; 64-NEXT: rolw %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolw %ax +; 64-NEXT: # kill: def $ax killed $ax killed $eax ; 64-NEXT: retq %B = shl i16 %A, 1 %C = lshr i16 %A, 15 @@ -419,8 +430,9 @@ ; ; 64-LABEL: rotr1_16: ; 64: # %bb.0: -; 64-NEXT: rorw %di ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorw %ax +; 64-NEXT: # kill: def $ax killed $ax killed $eax ; 64-NEXT: retq %B = lshr i16 %A, 1 %C = shl i16 %A, 15 @@ -439,8 +451,10 @@ ; 64-LABEL: rotl8: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rolb %cl, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rolb %cl, %al +; 64-NEXT: # kill: def $al killed $al killed $eax ; 64-NEXT: retq %B = shl i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -460,8 +474,10 @@ ; 64-LABEL: rotr8: ; 64: # %bb.0: ; 64-NEXT: movl %esi, %ecx -; 64-NEXT: rorb %cl, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: # kill: def $cl killed $cl killed $ecx +; 64-NEXT: rorb %cl, %al +; 64-NEXT: # kill: def $al killed $al killed $eax ; 64-NEXT: retq %B = lshr i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -479,8 +495,9 @@ ; ; 64-LABEL: rotli8: ; 64: # %bb.0: -; 64-NEXT: rolb $5, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb $5, %al +; 64-NEXT: # kill: def $al killed $al killed $eax ; 64-NEXT: retq %B = shl i8 %A, 5 %C = lshr i8 %A, 3 @@ -497,8 +514,9 @@ ; ; 64-LABEL: rotri8: ; 64: # %bb.0: -; 64-NEXT: rolb $3, %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb $3, %al +; 64-NEXT: # kill: def $al killed $al killed $eax ; 64-NEXT: retq %B = lshr i8 %A, 5 %C = shl i8 %A, 3 @@ -515,8 +533,9 @@ ; ; 64-LABEL: rotl1_8: ; 64: # %bb.0: -; 64-NEXT: rolb %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rolb %al +; 64-NEXT: # kill: def $al killed $al killed $eax ; 64-NEXT: retq %B = shl i8 %A, 1 %C = lshr i8 %A, 7 @@ -533,8 +552,9 @@ ; ; 64-LABEL: rotr1_8: ; 64: # %bb.0: -; 64-NEXT: rorb %dil ; 64-NEXT: movl %edi, %eax +; 64-NEXT: rorb %al +; 64-NEXT: # kill: def $al killed $al killed $eax ; 64-NEXT: retq %B = lshr i8 %A, 1 %C = shl i8 %A, 7 @@ -665,6 +685,7 @@ ; 64-LABEL: truncated_rot: ; 64: # %bb.0: # %entry ; 64-NEXT: movl %esi, %ecx +; 64-NEXT: # kill: def $cl killed $cl killed $ecx ; 64-NEXT: rolq %cl, %rdi ; 64-NEXT: movl %edi, %eax ; 64-NEXT: retq Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -8,8 +8,9 @@ ; CHECK-LABEL: rotate_left_32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: roll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: roll %cl, %eax ; CHECK-NEXT: retq %and = and i32 %b, 31 %shl = shl i32 %a, %and @@ -24,8 +25,9 @@ ; CHECK-LABEL: rotate_right_32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: rorl %cl, %eax ; CHECK-NEXT: retq %and = and i32 %b, 31 %shl = lshr i32 %a, %and @@ -39,9 +41,10 @@ define i64 @rotate_left_64(i64 %a, i64 %b) { ; CHECK-LABEL: rotate_left_64: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolq %cl, %rdi +; CHECK-NEXT: movq %rsi, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: rolq %cl, %rax ; CHECK-NEXT: retq %and = and i64 %b, 63 %shl = shl i64 %a, %and @@ -55,9 +58,10 @@ define i64 @rotate_right_64(i64 %a, i64 %b) { ; CHECK-LABEL: rotate_right_64: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorq %cl, %rdi +; CHECK-NEXT: movq %rsi, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: rorq %cl, %rax ; CHECK-NEXT: retq %and = and i64 %b, 63 %shl = lshr i64 %a, %and @@ -74,6 +78,7 @@ ; CHECK-LABEL: rotate_left_m32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: roll %cl, (%rdi) ; CHECK-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -91,6 +96,7 @@ ; CHECK-LABEL: rotate_right_m32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: rorl %cl, (%rdi) ; CHECK-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -107,7 +113,8 @@ define void @rotate_left_m64(i64 *%pa, i64 %b) { ; CHECK-LABEL: rotate_left_m64: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: rolq %cl, (%rdi) ; CHECK-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -124,7 +131,8 @@ define void @rotate_right_m64(i64 *%pa, i64 %b) { ; CHECK-LABEL: rotate_right_m64: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: rorq %cl, (%rdi) ; CHECK-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -145,8 +153,10 @@ ; CHECK-LABEL: rotate_left_8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: rolb %cl, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -162,8 +172,10 @@ ; CHECK-LABEL: rotate_right_8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: rorb %cl, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -179,8 +191,10 @@ ; CHECK-LABEL: rotate_left_16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rolw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: rolw %cl, %ax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -196,8 +210,10 @@ ; CHECK-LABEL: rotate_right_16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: rorw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: rorw %cl, %ax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -213,6 +229,7 @@ ; CHECK-LABEL: rotate_left_m8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: rolb %cl, (%rdi) ; CHECK-NEXT: retq %x = load i8, i8* %p, align 1 @@ -231,6 +248,7 @@ ; CHECK-LABEL: rotate_right_m8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: rorb %cl, (%rdi) ; CHECK-NEXT: retq %x = load i8, i8* %p, align 1 @@ -249,6 +267,7 @@ ; CHECK-LABEL: rotate_left_m16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: rolw %cl, (%rdi) ; CHECK-NEXT: retq %x = load i16, i16* %p, align 1 @@ -267,6 +286,7 @@ ; CHECK-LABEL: rotate_right_m16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: rorw %cl, (%rdi) ; CHECK-NEXT: retq %x = load i16, i16* %p, align 1 Index: test/CodeGen/X86/sar_fold64.ll =================================================================== --- test/CodeGen/X86/sar_fold64.ll +++ test/CodeGen/X86/sar_fold64.ll @@ -56,9 +56,10 @@ define i8 @all_sign_bit_ashr(i8 %x) { ; CHECK-LABEL: all_sign_bit_ashr: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: negb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %and = and i8 %x, 1 %neg = sub i8 0, %and Index: test/CodeGen/X86/scalar_widen_div.ll =================================================================== --- test/CodeGen/X86/scalar_widen_div.ll +++ test/CodeGen/X86/scalar_widen_div.ll @@ -57,20 +57,21 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { ; CHECK-LABEL: test_char_div: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %edx, %r10d ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %cl ; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %r8b -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %r10d, %eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %r9b ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: retq %div.r = sdiv <3 x i8> %num, %div ret <3 x i8> %div.r @@ -233,8 +234,8 @@ ; CHECK-LABEL: test_ulong_div: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %r10 -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq %rcx ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: xorl %edx, %edx Index: test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- test/CodeGen/X86/schedule-x86-64-shld.ll +++ test/CodeGen/X86/schedule-x86-64-shld.ll @@ -12,20 +12,20 @@ define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize { ; GENERIC-LABEL: lshift10_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shldq $10, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift10_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shldq $10, %rsi, %rdi # sched: [3:3.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift10_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: shldq $10, %rsi, %rdi ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: shldq $10, %rsi, %rax ; BDVER1-NEXT: retq entry: %shl = shl i64 %a, 10 @@ -37,8 +37,8 @@ define i64 @lshift10(i64 %a, i64 %b) nounwind readnone { ; GENERIC-LABEL: lshift10: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shldq $10, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift10: @@ -70,20 +70,20 @@ define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize { ; GENERIC-LABEL: rshift10_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shrdq $62, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift10_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shrdq $62, %rsi, %rdi # sched: [3:3.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: rshift10_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: shrdq $62, %rsi, %rdi ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: shrdq $62, %rsi, %rax ; BDVER1-NEXT: retq entry: %shl = lshr i64 %a, 62 @@ -96,8 +96,8 @@ define i64 @rshift10(i64 %a, i64 %b) nounwind readnone { ; GENERIC-LABEL: rshift10: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shrdq $62, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift10: @@ -126,23 +126,26 @@ define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize { ; GENERIC-LABEL: lshift_cl_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_cl_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx +; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_cl_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shldq %cl, %rsi, %rdi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx +; BDVER1-NEXT: shldq %cl, %rsi, %rax ; BDVER1-NEXT: retq entry: %shl = shl i64 %a, %c @@ -155,33 +158,34 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone { ; GENERIC-LABEL: lshift_cl: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx -; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BTVER2-NEXT: movl $64, %edx # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] +; BTVER2-NEXT: subl %ecx, %edx # sched: [1:0.50] +; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] +; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50] +; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shlq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx -; BDVER1-NEXT: shrq %cl, %rsi -; BDVER1-NEXT: orq %rdi, %rsi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rsi, %rax +; BDVER1-NEXT: shlq %cl, %rdi +; BDVER1-NEXT: movl $64, %edx +; BDVER1-NEXT: subl %ecx, %edx +; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: shrq %cl, %rax +; BDVER1-NEXT: orq %rdi, %rax ; BDVER1-NEXT: retq entry: %shl = shl i64 %a, %c @@ -200,23 +204,26 @@ define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize { ; GENERIC-LABEL: rshift_cl_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift_cl_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx +; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: rshift_cl_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shrdq %cl, %rsi, %rdi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx +; BDVER1-NEXT: shrdq %cl, %rsi, %rax ; BDVER1-NEXT: retq entry: %shr = lshr i64 %a, %c @@ -229,33 +236,34 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone { ; GENERIC-LABEL: rshift_cl: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx -; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BTVER2-NEXT: movl $64, %edx # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] +; BTVER2-NEXT: subl %ecx, %edx # sched: [1:0.50] +; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] +; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] +; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: rshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx -; BDVER1-NEXT: shlq %cl, %rsi -; BDVER1-NEXT: orq %rdi, %rsi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rsi, %rax +; BDVER1-NEXT: shrq %cl, %rdi +; BDVER1-NEXT: movl $64, %edx +; BDVER1-NEXT: subl %ecx, %edx +; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: shlq %cl, %rax +; BDVER1-NEXT: orq %rdi, %rax ; BDVER1-NEXT: retq entry: %shr = lshr i64 %a, %c @@ -275,19 +283,22 @@ define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize { ; GENERIC-LABEL: lshift_mem_cl_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx ; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_mem_cl_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_mem_cl_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %esi, %ecx +; BDVER1-NEXT: movq %rsi, %rcx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shldq %cl, %rdi, {{.*}}(%rip) ; BDVER1-NEXT: retq entry: @@ -303,33 +314,34 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone { ; GENERIC-LABEL: lshift_mem_cl: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx ; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_mem_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00] -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: movq {{.*}}(%rip), %rdx # sched: [5:1.00] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] +; BTVER2-NEXT: movl $64, %eax # sched: [1:0.50] +; BTVER2-NEXT: subl %ecx, %eax # sched: [1:0.50] +; BTVER2-NEXT: shlq %cl, %rdx # sched: [1:0.50] +; BTVER2-NEXT: movl %eax, %ecx # sched: [1:0.50] ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] +; BTVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_mem_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movq {{.*}}(%rip), %rax -; BDVER1-NEXT: movl %esi, %ecx -; BDVER1-NEXT: shlq %cl, %rax -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %esi, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: movq %rsi, %rcx +; BDVER1-NEXT: movq {{.*}}(%rip), %rdx +; BDVER1-NEXT: shlq %cl, %rdx +; BDVER1-NEXT: movl $64, %eax +; BDVER1-NEXT: subl %ecx, %eax +; BDVER1-NEXT: movl %eax, %ecx ; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: orq %rax, %rdi +; BDVER1-NEXT: orq %rdx, %rdi ; BDVER1-NEXT: movq %rdi, {{.*}}(%rip) ; BDVER1-NEXT: retq entry: Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -2541,62 +2541,62 @@ define i32 @test_bswap32(i32 %a0) optsize { ; GENERIC-LABEL: test_bswap32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bswapl %edi # sched: [2:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: bswapl %eax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_bswap32: ; ATOM: # %bb.0: -; ATOM-NEXT: bswapl %edi # sched: [1:1.00] ; ATOM-NEXT: movl %edi, %eax # sched: [1:0.50] +; ATOM-NEXT: bswapl %eax # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_bswap32: ; SLM: # %bb.0: -; SLM-NEXT: bswapl %edi # sched: [1:0.50] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: bswapl %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_bswap32: ; SANDY: # %bb.0: -; SANDY-NEXT: bswapl %edi # sched: [2:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: bswapl %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bswap32: ; HASWELL: # %bb.0: -; HASWELL-NEXT: bswapl %edi # sched: [2:0.50] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: bswapl %eax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_bswap32: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bswapl %edi # sched: [2:0.50] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: bswapl %eax # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bswap32: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bswapl %edi # sched: [2:0.50] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: bswapl %eax # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_bswap32: ; SKX: # %bb.0: -; SKX-NEXT: bswapl %edi # sched: [2:0.50] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: bswapl %eax # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bswap32: ; BTVER2: # %bb.0: -; BTVER2-NEXT: bswapl %edi # sched: [1:0.50] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: bswapl %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_bswap32: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bswapl %edi # sched: [1:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: bswapl %eax # sched: [1:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind ret i32 %1 @@ -2604,62 +2604,62 @@ define i64 @test_bswap64(i64 %a0) optsize { ; GENERIC-LABEL: test_bswap64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bswapq %rdi # sched: [2:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: bswapq %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_bswap64: ; ATOM: # %bb.0: -; ATOM-NEXT: bswapq %rdi # sched: [1:1.00] ; ATOM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; ATOM-NEXT: bswapq %rax # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_bswap64: ; SLM: # %bb.0: -; SLM-NEXT: bswapq %rdi # sched: [1:0.50] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: bswapq %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_bswap64: ; SANDY: # %bb.0: -; SANDY-NEXT: bswapq %rdi # sched: [2:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: bswapq %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bswap64: ; HASWELL: # %bb.0: -; HASWELL-NEXT: bswapq %rdi # sched: [2:0.50] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: bswapq %rax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_bswap64: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bswapq %rdi # sched: [2:0.50] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: bswapq %rax # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bswap64: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bswapq %rdi # sched: [2:0.50] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: bswapq %rax # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_bswap64: ; SKX: # %bb.0: -; SKX-NEXT: bswapq %rdi # sched: [2:0.50] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: bswapq %rax # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bswap64: ; BTVER2: # %bb.0: -; BTVER2-NEXT: bswapq %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: bswapq %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_bswap64: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bswapq %rdi # sched: [1:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: bswapq %rax # sched: [1:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = tail call i64 asm "bswap $0", "=r,0"(i64 %a0) nounwind ret i64 %1 Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -622,23 +622,14 @@ declare noalias i8* @_Znam(i64) noredzone define noalias i8* @test12(i64 %count) nounwind ssp noredzone { -; GENERIC-LABEL: test12: -; GENERIC: ## %bb.0: ## %entry -; GENERIC-NEXT: movl $4, %ecx -; GENERIC-NEXT: movq %rdi, %rax -; GENERIC-NEXT: mulq %rcx -; GENERIC-NEXT: movq $-1, %rdi -; GENERIC-NEXT: cmovnoq %rax, %rdi -; GENERIC-NEXT: jmp __Znam ## TAILCALL -; -; ATOM-LABEL: test12: -; ATOM: ## %bb.0: ## %entry -; ATOM-NEXT: movq %rdi, %rax -; ATOM-NEXT: movl $4, %ecx -; ATOM-NEXT: mulq %rcx -; ATOM-NEXT: movq $-1, %rdi -; ATOM-NEXT: cmovnoq %rax, %rdi -; ATOM-NEXT: jmp __Znam ## TAILCALL +; CHECK-LABEL: test12: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: mulq %rcx +; CHECK-NEXT: movq $-1, %rdi +; CHECK-NEXT: cmovnoq %rax, %rdi +; CHECK-NEXT: jmp __Znam ## TAILCALL ; ; MCU-LABEL: test12: ; MCU: # %bb.0: # %entry @@ -655,7 +646,7 @@ ; MCU-NEXT: movl %edi, %edx ; MCU-NEXT: pushl $0 ; MCU-NEXT: pushl $4 -; MCU-NEXT: calll __udivdi3 +; MCU-NEXT: calll __udivdi3@PLT ; MCU-NEXT: addl $8, %esp ; MCU-NEXT: xorl %ebx, %edx ; MCU-NEXT: xorl %ebp, %eax @@ -827,16 +818,18 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; GENERIC-LABEL: test18: ; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpl $15, %edi -; GENERIC-NEXT: cmovgel %edx, %esi ; GENERIC-NEXT: movl %esi, %eax +; GENERIC-NEXT: cmpl $15, %edi +; GENERIC-NEXT: cmovgel %edx, %eax +; GENERIC-NEXT: ## kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test18: ; ATOM: ## %bb.0: -; ATOM-NEXT: cmpl $15, %edi -; ATOM-NEXT: cmovgel %edx, %esi ; ATOM-NEXT: movl %esi, %eax +; ATOM-NEXT: cmpl $15, %edi +; ATOM-NEXT: cmovgel %edx, %eax +; ATOM-NEXT: ## kill: def $al killed $al killed $eax ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq @@ -858,16 +851,18 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { ; CHECK-LABEL: trunc_select_miscompile: ; CHECK: ## %bb.0: -; CHECK-NEXT: orb $2, %sil ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $2, %cl +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: trunc_select_miscompile: ; MCU: # %bb.0: -; MCU-NEXT: orb $2, %dl ; MCU-NEXT: movl %edx, %ecx +; MCU-NEXT: orb $2, %cl +; MCU-NEXT: # kill: def $cl killed $cl killed $ecx ; MCU-NEXT: shll %cl, %eax ; MCU-NEXT: retl %tmp1 = select i1 %cc, i32 3, i32 2 @@ -1086,10 +1081,10 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_xor_2: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: xorl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_2: @@ -1110,10 +1105,10 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_or: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or: @@ -1134,10 +1129,10 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { ; CHECK-LABEL: select_or_1: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or_1: Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -43,8 +43,8 @@ define i32 @select_1_or_0(i1 %cond) { ; CHECK-LABEL: select_1_or_0: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -62,8 +62,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_1_or_0_signext: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -95,8 +95,8 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_signext: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -107,9 +107,9 @@ define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -118,8 +118,8 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: ; CHECK: # %bb.0: -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -329,9 +329,10 @@ define i8 @select_pow2_diff(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff: ; CHECK: # %bb.0: -; CHECK-NEXT: shlb $4, %dil -; CHECK-NEXT: orb $3, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: orb $3, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %sel = select i1 %cond, i8 19, i8 3 ret i8 %sel Index: test/CodeGen/X86/setcc-logic.ll =================================================================== --- test/CodeGen/X86/setcc-logic.ll +++ test/CodeGen/X86/setcc-logic.ll @@ -41,9 +41,10 @@ define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_set: ; CHECK: # %bb.0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 @@ -66,9 +67,10 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_set: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 Index: test/CodeGen/X86/sext-i1.ll =================================================================== --- test/CodeGen/X86/sext-i1.ll +++ test/CodeGen/X86/sext-i1.ll @@ -165,8 +165,8 @@ ; ; X64-LABEL: select_0_or_1s_signext: ; X64: # %bb.0: -; X64-NEXT: notl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: notl %eax ; X64-NEXT: retq %not = xor i1 %cond, 1 %sext = sext i1 %not to i32 Index: test/CodeGen/X86/shift-and.ll =================================================================== --- test/CodeGen/X86/shift-and.ll +++ test/CodeGen/X86/shift-and.ll @@ -12,9 +12,10 @@ ; ; X64-LABEL: t1: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 31 %res = shl i32 %val, %shamt @@ -31,9 +32,10 @@ ; ; X64-LABEL: t2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 63 %res = shl i32 %val, %shamt @@ -52,6 +54,7 @@ ; X64-LABEL: t3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: sarw %cl, {{.*}}(%rip) ; X64-NEXT: retq %shamt = and i16 %t, 31 @@ -82,9 +85,10 @@ ; ; X64-LABEL: t4: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt @@ -112,9 +116,10 @@ ; ; X64-LABEL: t5: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 191 %res = lshr i64 %val, %shamt @@ -147,7 +152,8 @@ ; ; X64-LABEL: t5ptr: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shrq %cl, (%rsi) ; X64-NEXT: retq %shamt = and i64 %t, 191 @@ -205,9 +211,9 @@ ; ; X64-LABEL: big_mask_constant: ; X64: # %bb.0: -; X64-NEXT: shrq $7, %rdi -; X64-NEXT: andl $134217728, %edi # imm = 0x8000000 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $7, %rax +; X64-NEXT: andl $134217728, %eax # imm = 0x8000000 ; X64-NEXT: retq %and = and i64 %x, 17179869184 ; 0x400000000 %sh = lshr i64 %and, 7 Index: test/CodeGen/X86/shift-bmi2.ll =================================================================== --- test/CodeGen/X86/shift-bmi2.ll +++ test/CodeGen/X86/shift-bmi2.ll @@ -26,8 +26,8 @@ ; ; BMI264-LABEL: shl32i: ; BMI264: # %bb.0: -; BMI264-NEXT: shll $5, %edi ; BMI264-NEXT: movl %edi, %eax +; BMI264-NEXT: shll $5, %eax ; BMI264-NEXT: retq %shl = shl i32 %x, 5 ret i32 %shl @@ -69,6 +69,23 @@ } define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64: ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rsi, %rdi, %rax @@ -78,16 +95,42 @@ } define i64 @shl64i(i64 %x) nounwind uwtable readnone { +; BMI2-LABEL: shl64i: +; BMI2: # %bb.0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64i: ; BMI264: # %bb.0: -; BMI264-NEXT: shlq $7, %rdi ; BMI264-NEXT: movq %rdi, %rax +; BMI264-NEXT: shlq $7, %rax ; BMI264-NEXT: retq %shl = shl i64 %x, 7 ret i64 %shl } define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64p: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl (%eax), %esi +; BMI2-NEXT: movl 4(%eax), %edx +; BMI2-NEXT: shldl %cl, %esi, %edx +; BMI2-NEXT: shlxl %ecx, %esi, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64p: ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rsi, (%rdi), %rax @@ -98,6 +141,15 @@ } define i64 @shl64pi(i64* %p) nounwind uwtable readnone { +; BMI2-LABEL: shl64pi: +; BMI2: # %bb.0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movl (%ecx), %eax +; BMI2-NEXT: movl 4(%ecx), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64pi: ; BMI264: # %bb.0: ; BMI264-NEXT: movq (%rdi), %rax @@ -141,6 +193,23 @@ } define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64: ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rsi, %rdi, %rax @@ -150,6 +219,24 @@ } define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64p: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64p: ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rsi, (%rdi), %rax @@ -192,6 +279,23 @@ } define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64: ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rsi, %rdi, %rax @@ -201,6 +305,24 @@ } define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64p: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64p: ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rsi, (%rdi), %rax @@ -227,6 +349,21 @@ } define i64 @shl64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: shl64and: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64and: ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rdi, %rsi, %rax @@ -253,6 +390,21 @@ } define i64 @lshr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: lshr64and: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64and: ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rdi, %rsi, %rax @@ -279,6 +431,21 @@ } define i64 @ashr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: ashr64and: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64and: ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rdi, %rsi, %rax Index: test/CodeGen/X86/shift-double-x86_64.ll =================================================================== --- test/CodeGen/X86/shift-double-x86_64.ll +++ test/CodeGen/X86/shift-double-x86_64.ll @@ -6,10 +6,11 @@ define i64 @test1(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -22,10 +23,11 @@ define i64 @test2(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -38,9 +40,10 @@ define i64 @test3(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = lshr i64 %lo, %bits64 @@ -52,9 +55,10 @@ define i64 @test4(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = shl i64 %hi, %bits64 @@ -66,9 +70,10 @@ define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = lshr i64 %lo, 1 @@ -81,9 +86,10 @@ define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = shl i64 %lo, 1 @@ -96,9 +102,10 @@ define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = add i64 %lo, %lo Index: test/CodeGen/X86/shift-double.ll =================================================================== --- test/CodeGen/X86/shift-double.ll +++ test/CodeGen/X86/shift-double.ll @@ -26,8 +26,9 @@ ; X64-LABEL: test1: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %shift.upgrd.1 = zext i8 %C to i64 ; [#uses=1] %Y = shl i64 %X, %shift.upgrd.1 ; [#uses=1] @@ -57,8 +58,9 @@ ; X64-LABEL: test2: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %shift.upgrd.2 = zext i8 %C to i64 ; [#uses=1] %Y = ashr i64 %X, %shift.upgrd.2 ; [#uses=1] @@ -87,8 +89,9 @@ ; X64-LABEL: test3: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shift.upgrd.3 = zext i8 %C to i64 ; [#uses=1] %Y = lshr i64 %X, %shift.upgrd.3 ; [#uses=1] @@ -109,8 +112,9 @@ ; X64-LABEL: test4: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %shift.upgrd.4 = zext i8 %C to i32 ; [#uses=1] %X = shl i32 %A, %shift.upgrd.4 ; [#uses=1] @@ -133,8 +137,10 @@ ; X64-LABEL: test5: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldw %cl, %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %shift.upgrd.6 = zext i8 %C to i16 ; [#uses=1] %X = shl i16 %A, %shift.upgrd.6 ; [#uses=1] @@ -159,8 +165,9 @@ ; X64-LABEL: test6: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %shift.upgrd.4 = zext i8 %C to i32 ; [#uses=1] %X = lshr i32 %A, %shift.upgrd.4 ; [#uses=1] @@ -183,8 +190,10 @@ ; X64-LABEL: test7: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdw %cl, %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %shift.upgrd.6 = zext i8 %C to i16 ; [#uses=1] %X = lshr i16 %A, %shift.upgrd.6 ; [#uses=1] @@ -212,10 +221,11 @@ ; ; X64-LABEL: test8: ; X64: # %bb.0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -235,10 +245,11 @@ ; ; X64-LABEL: test9: ; X64: # %bb.0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -258,10 +269,11 @@ ; ; X64-LABEL: test10: ; X64: # %bb.0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -284,10 +296,11 @@ ; ; X64-LABEL: test11: ; X64: # %bb.0: -; X64-NEXT: andl $31, %edx ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $31, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %and = and i32 %bits, 31 %and32 = sub i32 32, %and @@ -310,10 +323,11 @@ ; ; X64-LABEL: test12: ; X64: # %bb.0: -; X64-NEXT: andl $31, %edx ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $31, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %and = and i32 %bits, 31 %and32 = sub i32 32, %and @@ -335,8 +349,9 @@ ; X64-LABEL: test13: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = sub i32 32, %bits %sh_lo = lshr i32 %lo, %bits32 @@ -357,8 +372,9 @@ ; X64-LABEL: test14: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %bits32 = sub i32 32, %bits %sh_lo = shl i32 %hi, %bits32 @@ -379,8 +395,9 @@ ; X64-LABEL: test15: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = lshr i32 %lo, 1 @@ -402,8 +419,9 @@ ; X64-LABEL: test16: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = shl i32 %lo, 1 @@ -425,8 +443,9 @@ ; X64-LABEL: test17: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = add i32 %lo, %lo Index: test/CodeGen/X86/shift-pair.ll =================================================================== --- test/CodeGen/X86/shift-pair.ll +++ test/CodeGen/X86/shift-pair.ll @@ -4,9 +4,9 @@ define i64 @test(i64 %A) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $54, %rdi -; CHECK-NEXT: andl $-4, %edi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $54, %rax +; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: retq %B = lshr i64 %A, 56 %C = shl i64 %B, 2 Index: test/CodeGen/X86/shuffle-of-insert.ll =================================================================== --- test/CodeGen/X86/shuffle-of-insert.ll +++ test/CodeGen/X86/shuffle-of-insert.ll @@ -6,15 +6,15 @@ define <4 x i32> @ins_elt_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_0: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_0: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $0, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $0, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_0: @@ -36,8 +36,8 @@ ; ; SSE4-LABEL: ins_elt_1: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $1, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $1, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_1: @@ -54,16 +54,16 @@ define <4 x i32> @ins_elt_2_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_2_commute: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_2_commute: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $2, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $2, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_2_commute: @@ -78,16 +78,16 @@ define <4 x i32> @ins_elt_3_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_3_commute: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_3_commute: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $3, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $3, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_3_commute: @@ -104,16 +104,16 @@ define <4 x i32> @ins_elt_0_to_2(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_0_to_2: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_0_to_2: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $2, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $2, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_0_to_2: @@ -128,15 +128,15 @@ define <4 x i32> @ins_elt_1_to_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_1_to_0: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_1_to_0: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $0, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $0, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_1_to_0: @@ -151,16 +151,16 @@ define <4 x i32> @ins_elt_2_to_3(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_2_to_3: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_2_to_3: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $3, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $3, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_2_to_3: @@ -182,8 +182,8 @@ ; ; SSE4-LABEL: ins_elt_3_to_1: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $1, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $1, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_3_to_1: Index: test/CodeGen/X86/sret-implicit.ll =================================================================== --- test/CodeGen/X86/sret-implicit.ll +++ test/CodeGen/X86/sret-implicit.ll @@ -10,7 +10,7 @@ } ; X64-LABEL: sret_void -; X64-DAG: movl $0, (%rdi) +; X64-DAG: movl $0, (%rax) ; X64-DAG: movq %rdi, %rax ; X64: retq @@ -24,7 +24,7 @@ } ; X64-LABEL: sret_demoted -; X64-DAG: movq $0, (%rdi) +; X64-DAG: movq $0, (%rax) ; X64-DAG: movq %rdi, %rax ; X64: retq Index: test/CodeGen/X86/sse1.ll =================================================================== --- test/CodeGen/X86/sse1.ll +++ test/CodeGen/X86/sse1.ll @@ -204,26 +204,27 @@ ; ; X64-LABEL: PR30512: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: sete %dil +; X64-NEXT: negl %edi +; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: sete %dil +; X64-NEXT: negl %edi +; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: sete %cl +; X64-NEXT: negl %ecx +; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpl %r9d, %esi -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: sete %cl +; X64-NEXT: negl %ecx +; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -232,8 +233,7 @@ ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; X64-NEXT: andps {{.*}}(%rip), %xmm2 -; X64-NEXT: movaps %xmm2, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movaps %xmm2, (%rax) ; X64-NEXT: retq %cmp = icmp eq <4 x i32> %x, %y %zext = zext <4 x i1> %cmp to <4 x i32> Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -473,8 +473,8 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; GENERIC-LABEL: test_monitor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: monitor # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -487,57 +487,57 @@ ; ; SLM-LABEL: test_monitor: ; SLM: # %bb.0: -; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] +; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: monitor # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_monitor: ; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] +; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: monitor # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_monitor: ; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_monitor: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: monitor # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_monitor: ; SKX: # %bb.0: -; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: monitor # sched: [100:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_monitor: ; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: monitor # sched: [100:0.17] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_monitor: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: monitor # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) @@ -782,71 +782,71 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; GENERIC-LABEL: test_mwait: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: mwait # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mwait: ; ATOM: # %bb.0: -; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] +; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: mwait # sched: [46:23.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_mwait: ; SLM: # %bb.0: -; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] +; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: mwait # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_mwait: ; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] +; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: mwait # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mwait: ; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_mwait: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: mwait # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mwait: ; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: mwait # sched: [20:2.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mwait: ; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50] +; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] ; BTVER2-NEXT: mwait # sched: [100:0.17] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_mwait: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: mwait # sched: [100:?] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) Index: test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll @@ -17,8 +17,8 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; X64-LABEL: test_mm_crc64_u64: ; X64: # %bb.0: -; X64-NEXT: crc32q %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: crc32q %rsi, %rax ; X64-NEXT: retq %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res Index: test/CodeGen/X86/sse42-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -19,12 +19,12 @@ ; ; X64-LABEL: test_mm_cmpestra: ; X64: # %bb.0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seta %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seta %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -48,12 +48,12 @@ ; ; X64-LABEL: test_mm_cmpestrc: ; X64: # %bb.0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: setb %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: setb %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -73,8 +73,8 @@ ; ; X64-LABEL: test_mm_cmpestri: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq @@ -95,8 +95,8 @@ ; ; X64-LABEL: test_mm_cmpestrm: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestrm $7, %xmm1, %xmm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> @@ -122,12 +122,12 @@ ; ; X64-LABEL: test_mm_cmpestro: ; X64: # %bb.0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seto %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seto %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -151,12 +151,12 @@ ; ; X64-LABEL: test_mm_cmpestrs: ; X64: # %bb.0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sets %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sets %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -180,12 +180,12 @@ ; ; X64-LABEL: test_mm_cmpestrz: ; X64: # %bb.0: -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sete %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sete %sil +; X64-NEXT: movl %esi, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -360,8 +360,8 @@ ; ; X64-LABEL: test_mm_crc32_u8: ; X64: # %bb.0: -; X64-NEXT: crc32b %sil, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32b %sil, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) ret i32 %res @@ -377,8 +377,8 @@ ; ; X64-LABEL: test_mm_crc32_u16: ; X64: # %bb.0: -; X64-NEXT: crc32w %si, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32w %si, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) ret i32 %res @@ -394,8 +394,8 @@ ; ; X64-LABEL: test_mm_crc32_u32: ; X64: # %bb.0: -; X64-NEXT: crc32l %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32l %esi, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res Index: test/CodeGen/X86/sse42-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-x86_64.ll @@ -9,8 +9,8 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; CHECK-LABEL: crc32_64_8: ; CHECK: ## %bb.0: -; CHECK-NEXT: crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp @@ -19,8 +19,8 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: crc32_64_64: ; CHECK: ## %bb.0: -; CHECK-NEXT: crc32q %rsi, %rdi ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -13,65 +13,65 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: crc32_32_8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_8: ; SLM: # %bb.0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_8: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_32_8: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_8: ; SKX: # %bb.0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_8: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -83,65 +83,65 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: crc32_32_16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32w %si, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_16: ; SLM: # %bb.0: -; SLM-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SLM-NEXT: crc32w (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SLM-NEXT: crc32w (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_16: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32w (%rdx), %eax # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_32_16: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_16: ; SKX: # %bb.0: -; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_16: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32w (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32w %si, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) %2 = load i16, i16 *%a2 @@ -153,65 +153,65 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: crc32_32_32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_32: ; SLM: # %bb.0: -; SLM-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SLM-NEXT: crc32l (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SLM-NEXT: crc32l (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_32_32: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32l (%rdx), %eax # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_32_32: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_32: ; SKX: # %bb.0: -; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_32_32: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32l (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) %2 = load i32, i32 *%a2 @@ -223,65 +223,65 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; GENERIC-LABEL: crc32_64_8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_8: ; SLM: # %bb.0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_64_8: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_64_8: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_8: ; SKX: # %bb.0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_64_8: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -293,65 +293,65 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: crc32_64_64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; GENERIC-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_64: ; SLM: # %bb.0: -; SLM-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SLM-NEXT: crc32q (%rdx), %rdi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SLM-NEXT: crc32q (%rdx), %rax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: crc32_64_64: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SANDY-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; HASWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_64_64: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_64: ; SKX: # %bb.0: -; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BTVER2-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BTVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: crc32_64_64: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; ZNVER1-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; ZNVER1-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) %2 = load i64, i64 *%a2 Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -6,23 +6,23 @@ define %S @negate(%S* nocapture readonly %this) { ; CHECK-LABEL: negate: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq (%rsi), %rax -; CHECK-NEXT: movq 8(%rsi), %rcx -; CHECK-NEXT: notq %rax -; CHECK-NEXT: addq $1, %rax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq (%rsi), %rcx +; CHECK-NEXT: movq 8(%rsi), %rdx ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: adcq $0, %rcx -; CHECK-NEXT: movq 16(%rsi), %rdx +; CHECK-NEXT: addq $1, %rcx ; CHECK-NEXT: notq %rdx ; CHECK-NEXT: adcq $0, %rdx +; CHECK-NEXT: movq 16(%rsi), %rdi +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: adcq $0, %rdi ; CHECK-NEXT: movq 24(%rsi), %rsi ; CHECK-NEXT: notq %rsi ; CHECK-NEXT: adcq $0, %rsi -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %rdx, 16(%rdi) -; CHECK-NEXT: movq %rsi, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: movq %rdx, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rsi, 24(%rax) ; CHECK-NEXT: retq entry: %0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0 @@ -63,29 +63,29 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rdx -; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: setb %r10b +; CHECK-NEXT: setb %dil ; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: adcq 8(%rsi), %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %r11d +; CHECK-NEXT: adcq 8(%rsi), %rdi +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %r10d ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: addq %r10, %rcx -; CHECK-NEXT: adcq 16(%rsi), %r11 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: adcq 16(%rsi), %r10 +; CHECK-NEXT: setb %dil +; CHECK-NEXT: movzbl %dil, %edi ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %r11, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rax +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: adcq 24(%rsi), %rdi ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rax, %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdi, %r9 +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -397,9 +397,9 @@ ; CHECK-LABEL: gen7: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, %r8d ; CHECK-NEXT: retq ; ; CHECK-O0-LABEL: gen7: @@ -420,9 +420,9 @@ ; CHECK-LABEL: gen8: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: movq %rdi, %r8 +; CHECK-NEXT: movq %rax, %rdx +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: movq %rax, %r8 ; CHECK-NEXT: retq ; ; CHECK-O0-LABEL: gen8: @@ -443,9 +443,9 @@ ; CHECK-LABEL: gen9: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, %r8d ; CHECK-NEXT: retq ; ; CHECK-O0-LABEL: gen9: @@ -465,13 +465,13 @@ define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(double %keyd, i64 %keyi) { ; CHECK-LABEL: gen10: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm0, %xmm2 ; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: movq %rdi, %r8 +; CHECK-NEXT: movq %rax, %rdx +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: movq %rax, %r8 ; CHECK-NEXT: retq ; ; CHECK-O0-LABEL: gen10: Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -34,11 +34,11 @@ ; CHECK-APPLE-LABEL: caller: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller: @@ -247,12 +247,12 @@ ; CHECK-APPLE: movl $1, %esi ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo_sret -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12), +; CHECK-APPLE: movb 8(%rdi), ; CHECK-APPLE: movb %{{.*}}, -; CHECK-APPLE: movq %r12, %rdi ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -297,21 +297,21 @@ ; The first swifterror value: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; The second swifterror value: ; CHECK-APPLE: xorl %r12d, %r12d ; CHECK-APPLE: callq {{.*}}foo -; CHECK-APPLE: testq %r12, %r12 +; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: testq %rdi, %rdi ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: @@ -488,8 +488,8 @@ ; CHECK-i386: retl ; CHECK-APPLE-LABEL: empty_swiftcc: ; CHECK-APPLE: movl %edx, %ecx -; CHECK-APPLE: movl %edi, %eax -; CHECK-APPLE: movl %esi, %edx +; CHECK-APPLE-DAG: movl %edi, %eax +; CHECK-APPLE-DAG: movl %esi, %edx ; CHECK-APPLE: retq define swiftcc {i32, i32, i32} @empty_swiftcc({i32, i32, i32} , %swift_error** swifterror %error_ptr_ref) { entry: Index: test/CodeGen/X86/system-intrinsics-xsetbv.ll =================================================================== --- test/CodeGen/X86/system-intrinsics-xsetbv.ll +++ test/CodeGen/X86/system-intrinsics-xsetbv.ll @@ -11,8 +11,8 @@ ; CHECK64-LABEL: test_xsetbv ; CHECK64: movl %edx, %eax -; CHECK64: movl %edi, %ecx -; CHECK64: movl %esi, %edx +; CHECK64-DAG: movl %edi, %ecx +; CHECK64-DAG: movl %esi, %edx ; CHECK64: xsetbv ; CHECK64: ret Index: test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll @@ -40,10 +40,10 @@ ; X64-LABEL: test__blcic_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -89,10 +89,10 @@ ; X64-LABEL: test__blsic_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 @@ -104,10 +104,10 @@ ; X64-LABEL: test__t1mskc_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -119,10 +119,10 @@ ; X64-LABEL: test__tzmsk_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 Index: test/CodeGen/X86/tbm-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel.ll @@ -72,10 +72,10 @@ ; X64-LABEL: test__blcic_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -154,10 +154,10 @@ ; X64-LABEL: test__blsic_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 @@ -178,10 +178,10 @@ ; X64-LABEL: test__t1mskc_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -202,10 +202,10 @@ ; X64-LABEL: test__tzmsk_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 Index: test/CodeGen/X86/tbm_patterns.ll =================================================================== --- test/CodeGen/X86/tbm_patterns.ll +++ test/CodeGen/X86/tbm_patterns.ll @@ -52,10 +52,10 @@ define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovnel %edx, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -113,10 +113,10 @@ define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovneq %rdx, %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -151,11 +151,11 @@ define i32 @test_x86_tbm_blcfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = and i32 %t0, %a @@ -190,10 +190,10 @@ define i64 @test_x86_tbm_blcfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = and i64 %t0, %a @@ -230,12 +230,12 @@ define i32 @test_x86_tbm_blci_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a %t1 = xor i32 %t0, -1 @@ -273,11 +273,11 @@ define i64 @test_x86_tbm_blci_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a %t1 = xor i64 %t0, -1 @@ -335,12 +335,12 @@ define i32 @test_x86_tbm_blcic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: testl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: testl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -378,12 +378,12 @@ define i64 @test_x86_tbm_blcic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: testq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: testq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -419,11 +419,11 @@ define i32 @test_x86_tbm_blcmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: xorl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: xorl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = xor i32 %t0, %a @@ -458,10 +458,10 @@ define i64 @test_x86_tbm_blcmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: xorq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: xorq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = xor i64 %t0, %a @@ -496,11 +496,11 @@ define i32 @test_x86_tbm_blcs_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = or i32 %t0, %a @@ -535,10 +535,10 @@ define i64 @test_x86_tbm_blcs_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = or i64 %t0, %a @@ -573,11 +573,11 @@ define i32 @test_x86_tbm_blsfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 %t1 = or i32 %t0, %a @@ -612,10 +612,10 @@ define i64 @test_x86_tbm_blsfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq -1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq -1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 %t1 = or i64 %t0, %a @@ -652,12 +652,12 @@ define i32 @test_x86_tbm_blsic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -695,12 +695,12 @@ define i64 @test_x86_tbm_blsic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 @@ -739,12 +739,12 @@ define i32 @test_x86_tbm_t1mskc_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -783,12 +783,12 @@ define i64 @test_x86_tbm_t1mskc_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -827,12 +827,12 @@ define i32 @test_x86_tbm_tzmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -871,12 +871,12 @@ define i64 @test_x86_tbm_tzmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 Index: test/CodeGen/X86/trunc-subvector.ll =================================================================== --- test/CodeGen/X86/trunc-subvector.ll +++ test/CodeGen/X86/trunc-subvector.ll @@ -41,9 +41,8 @@ ; SSE2-LABEL: test3: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; AVX2-LABEL: test3: @@ -167,9 +166,9 @@ define <2 x i32> @test8(<8 x i32> %v) { ; SSE2-LABEL: test8: ; SSE2: # %bb.0: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; AVX2-LABEL: test8: Index: test/CodeGen/X86/twoaddr-lea.ll =================================================================== --- test/CodeGen/X86/twoaddr-lea.ll +++ test/CodeGen/X86/twoaddr-lea.ll @@ -11,8 +11,8 @@ define i32 @test1(i32 %X) nounwind { ; CHECK-LABEL: test1: -; CHECK-NOT: mov -; CHECK: leal 1(%rdi) +; CHECK: movl %edi, %eax +; CHECK: leal 1(%rax) %Z = add i32 %X, 1 store volatile i32 %Z, i32* @G ret i32 %X Index: test/CodeGen/X86/umul-with-overflow.ll =================================================================== --- test/CodeGen/X86/umul-with-overflow.ll +++ test/CodeGen/X86/umul-with-overflow.ll @@ -15,8 +15,8 @@ ; ; X64-LABEL: a: ; X64: # %bb.0: -; X64-NEXT: movl $3, %ecx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3, %ecx ; X64-NEXT: mull %ecx ; X64-NEXT: seto %al ; X64-NEXT: retq Index: test/CodeGen/X86/urem-power-of-two.ll =================================================================== --- test/CodeGen/X86/urem-power-of-two.ll +++ test/CodeGen/X86/urem-power-of-two.ll @@ -14,8 +14,8 @@ ; ; X64-LABEL: const_pow_2: ; X64: # %bb.0: -; X64-NEXT: andl $31, %edi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $31, %eax ; X64-NEXT: retq %urem = urem i64 %x, 32 ret i64 %urem @@ -35,8 +35,9 @@ ; ; X64-LABEL: shift_left_pow_2: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF ; X64-NEXT: andl %edi, %eax @@ -61,8 +62,9 @@ ; ; X64-LABEL: shift_right_pow_2: ; X64: # %bb.0: -; X64-NEXT: movl $32768, %eax # imm = 0x8000 ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $32768, %eax # imm = 0x8000 +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: decl %eax ; X64-NEXT: andl %edi, %eax Index: test/CodeGen/X86/use-add-flags.ll =================================================================== --- test/CodeGen/X86/use-add-flags.ll +++ test/CodeGen/X86/use-add-flags.ll @@ -10,16 +10,16 @@ define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { ; LNX-LABEL: test1: ; LNX: # %bb.0: -; LNX-NEXT: addl (%rdi), %esi -; LNX-NEXT: cmovnsl %ecx, %edx ; LNX-NEXT: movl %edx, %eax +; LNX-NEXT: addl (%rdi), %esi +; LNX-NEXT: cmovnsl %ecx, %eax ; LNX-NEXT: retq ; ; WIN-LABEL: test1: ; WIN: # %bb.0: -; WIN-NEXT: addl (%rcx), %edx -; WIN-NEXT: cmovnsl %r9d, %r8d ; WIN-NEXT: movl %r8d, %eax +; WIN-NEXT: addl (%rcx), %edx +; WIN-NEXT: cmovnsl %r9d, %eax ; WIN-NEXT: retq %tmp2 = load i32, i32* %x, align 4 ; [#uses=1] %tmp4 = add i32 %tmp2, %y ; [#uses=1] Index: test/CodeGen/X86/vector-bitreverse.ll =================================================================== --- test/CodeGen/X86/vector-bitreverse.ll +++ test/CodeGen/X86/vector-bitreverse.ll @@ -14,38 +14,40 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; SSE-LABEL: test_bitreverse_i8: ; SSE: # %bb.0: -; SSE-NEXT: rolb $4, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $51, %al -; SSE-NEXT: shlb $2, %al -; SSE-NEXT: andb $-52, %dil -; SSE-NEXT: shrb $2, %dil -; SSE-NEXT: orb %al, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $85, %al -; SSE-NEXT: addb %al, %al -; SSE-NEXT: andb $-86, %dil -; SSE-NEXT: shrb %dil -; SSE-NEXT: orb %al, %dil ; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: rolb $4, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $51, %cl +; SSE-NEXT: shlb $2, %cl +; SSE-NEXT: andb $-52, %al +; SSE-NEXT: shrb $2, %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $85, %cl +; SSE-NEXT: addb %cl, %cl +; SSE-NEXT: andb $-86, %al +; SSE-NEXT: shrb %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_bitreverse_i8: ; AVX: # %bb.0: -; AVX-NEXT: rolb $4, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $51, %al -; AVX-NEXT: shlb $2, %al -; AVX-NEXT: andb $-52, %dil -; AVX-NEXT: shrb $2, %dil -; AVX-NEXT: orb %al, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $85, %al -; AVX-NEXT: addb %al, %al -; AVX-NEXT: andb $-86, %dil -; AVX-NEXT: shrb %dil -; AVX-NEXT: orb %al, %dil ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: rolb $4, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $51, %cl +; AVX-NEXT: shlb $2, %cl +; AVX-NEXT: andb $-52, %al +; AVX-NEXT: shrb $2, %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $85, %cl +; AVX-NEXT: addb %cl, %cl +; AVX-NEXT: andb $-86, %al +; AVX-NEXT: shrb %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; ; XOP-LABEL: test_bitreverse_i8: Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -338,30 +338,30 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { ; SSE2-LABEL: vsel_double8: ; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double8: ; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double8: ; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] ; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double8: @@ -377,30 +377,30 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { ; SSE2-LABEL: vsel_i648: ; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_i648: ; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_i648: ; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] ; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_i648: @@ -528,22 +528,22 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; SSE2-LABEL: constant_blendvpd_avx: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: constant_blendvpd_avx: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movaps %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movapd %xmm3, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: constant_blendvpd_avx: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_blendvpd_avx: @@ -740,20 +740,20 @@ define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { ; SSE2-LABEL: blend_shufflevector_4xi64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_shufflevector_4xi64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_shufflevector_4xi64: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: blend_shufflevector_4xi64: Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -344,254 +344,254 @@ define <32 x i1> @test_cmp_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i8: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i8: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm3, %xmm1 -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: pextrb $1, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm0, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm0, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm0, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm0, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i8: @@ -933,6 +933,7 @@ define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i16: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -940,253 +941,252 @@ ; SSE2-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE2-NEXT: packsswb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i16: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtw %xmm7, %xmm3 ; SSE42-NEXT: pcmpgtw %xmm6, %xmm2 -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: pextrb $2, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: pextrb $0, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: pextrb $10, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: pextrb $12, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i16: @@ -1247,500 +1247,501 @@ define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i8: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE2-NEXT: pcmpgtb %xmm7, %xmm3 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 6(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 6(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 4(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 4(%rax) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i8: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE42-NEXT: pcmpgtb %xmm7, %xmm3 -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm3, %ecx +; SSE42-NEXT: pextrb $1, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: pextrb $5, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm2, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm2, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm2, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm2, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm2, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 6(%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm2, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 4(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm0, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm0, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm0, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm0, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm0, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v64i8: ; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4 @@ -1749,509 +1750,508 @@ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm1, %edx +; AVX1-NEXT: vpextrb $0, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm1, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm1, %ecx +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm1, %edx +; AVX1-NEXT: vpextrb $6, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm2, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm2, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm2, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm2, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm2, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm2, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm2, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm2, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm0, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm0, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm0, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm0, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm0, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm4, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm2, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: vpextrb $0, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm4, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm4, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm4, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm4, %ecx +; AVX1-NEXT: vpextrb $5, %xmm0, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm4, %edx +; AVX1-NEXT: vpextrb $6, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm4, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm4, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm4, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm4, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i8: ; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $3, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm1, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $3, %xmm1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: vpextrb $0, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $3, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx +; AVX2-NEXT: vpextrb $6, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm0, %edx +; AVX2-NEXT: vpextrb $0, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $3, %xmm0, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2394,6 +2394,7 @@ define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 @@ -2417,130 +2418,130 @@ ; SSE2-NEXT: packssdw %xmm11, %xmm9 ; SSE2-NEXT: packsswb %xmm10, %xmm9 ; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f32: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm15 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm13 @@ -2557,125 +2558,124 @@ ; SSE42-NEXT: cmpltps %xmm6, %xmm13 ; SSE42-NEXT: cmpltps %xmm5, %xmm14 ; SSE42-NEXT: cmpltps %xmm4, %xmm15 -; SSE42-NEXT: pextrb $4, %xmm15, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm15, %ecx +; SSE42-NEXT: pextrb $4, %xmm15, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm14, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm14, %edx +; SSE42-NEXT: pextrb $0, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm13, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm13, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm13, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm12, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm12, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm12, %ecx +; SSE42-NEXT: pextrb $4, %xmm14, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm12, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm11, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm10, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm10, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm10, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm10, %edx +; SSE42-NEXT: pextrb $8, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm9, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm9, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm14, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm13, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm9, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm9, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm13, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm13, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm8, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm8, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm13, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm12, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm12, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm12, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm12, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm11, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm11, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm11, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm11, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm10, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm8, %ecx +; SSE42-NEXT: pextrb $4, %xmm10, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm8, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $8, %xmm10, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm10, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm9, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm9, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm9, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm9, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm8, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm8, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm8, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm8, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32f32: @@ -2945,6 +2945,7 @@ define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 @@ -2960,130 +2961,130 @@ ; SSE2-NEXT: packssdw %xmm5, %xmm4 ; SSE2-NEXT: packsswb %xmm6, %xmm4 ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i32: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm1 @@ -3092,125 +3093,124 @@ ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm4 -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: pextrb $4, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm5, %edx +; SSE42-NEXT: pextrb $0, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm6, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm6, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: pextrb $4, %xmm5, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: pextrb $8, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i32: @@ -3291,6 +3291,7 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i16: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -3304,250 +3305,250 @@ ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: packsswb %xmm7, %xmm6 ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 6(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 6(%rax) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 4(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 4(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i16: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm3 @@ -3556,247 +3557,247 @@ ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm4 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 -; SSE42-NEXT: pextrb $2, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: pextrb $2, %xmm6, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm6, %edx +; SSE42-NEXT: pextrb $0, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm7, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: pextrb $10, %xmm6, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm4, %edx +; SSE42-NEXT: pextrb $12, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm5, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm5, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm5, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm5, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 6(%rax) +; SSE42-NEXT: pextrb $2, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: pextrb $12, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm5, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 4(%rax) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: pextrb $12, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v64i16: ; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 ; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 @@ -3813,258 +3814,258 @@ ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm2 -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $4, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $6, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $8, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $12, %xmm2, %ecx +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $14, %xmm2, %edx +; AVX1-NEXT: vpextrb $0, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm7, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $4, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm7, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $6, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm7, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $8, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm7, %ecx +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm7, %edx +; AVX1-NEXT: vpextrb $12, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm3, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $14, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm7, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm3, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm7, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm3, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm7, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm3, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm7, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm4, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm4, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm4, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm4, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $4, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $6, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $8, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $12, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $14, %xmm0, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm5, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm5, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm5, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm5, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm9, %ecx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm4, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 4(%rax) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm9, %edx +; AVX1-NEXT: vpextrb $0, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm9, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $4, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm9, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $6, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm9, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $8, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm8, %ecx +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm8, %edx +; AVX1-NEXT: vpextrb $12, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm8, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $14, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm8, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm8, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm8, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i16: ; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm5 ; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm1 ; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm4 @@ -4073,253 +4074,252 @@ ; AVX2-NEXT: vextracti128 $1, %ymm7, %xmm3 ; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm6 ; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm2 -; AVX2-NEXT: vpextrb $2, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $4, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $6, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $8, %xmm6, %ecx +; AVX2-NEXT: vpextrb $2, %xmm6, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $10, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $12, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $14, %xmm6, %edx +; AVX2-NEXT: vpextrb $0, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm2, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $4, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm2, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $6, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm2, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $8, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm2, %ecx +; AVX2-NEXT: vpextrb $10, %xmm6, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm2, %edx +; AVX2-NEXT: vpextrb $12, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm7, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $14, %xmm6, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm7, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm7, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm7, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm3, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm7, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm3, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm7, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm3, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm7, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm3, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $4, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $6, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $8, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $10, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $12, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $14, %xmm4, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm7, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm0, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm0, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm0, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm5, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm3, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 4(%rax) +; AVX2-NEXT: vpextrb $2, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: vpextrb $0, %xmm4, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm5, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $4, %xmm4, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm5, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $6, %xmm4, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm5, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $8, %xmm4, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: vpextrb $10, %xmm4, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm1, %edx +; AVX2-NEXT: vpextrb $12, %xmm4, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $14, %xmm4, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm1, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm1, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -4372,6 +4372,7 @@ ; SSE2-LABEL: test_cmp_v128i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -4381,491 +4382,491 @@ ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 ; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 14(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 14(%rax) ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 12(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 12(%rax) ; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 10(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 10(%rax) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 8(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 8(%rax) ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 6(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 6(%rax) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 4(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 4(%rax) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v128i8: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -4874,483 +4875,483 @@ ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $1, %xmm7, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm7, %ecx +; SSE42-NEXT: pextrb $1, %xmm7, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm7, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm7, %edx +; SSE42-NEXT: pextrb $0, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm7, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm7, %ecx +; SSE42-NEXT: pextrb $5, %xmm7, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm7, %edx +; SSE42-NEXT: pextrb $6, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm7, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 14(%rdi) -; SSE42-NEXT: pextrb $1, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm6, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm6, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm6, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm6, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 14(%rax) +; SSE42-NEXT: pextrb $1, %xmm6, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm6, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 12(%rdi) -; SSE42-NEXT: pextrb $1, %xmm5, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm5, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm5, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm5, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm5, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: pextrb $5, %xmm6, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm5, %edx +; SSE42-NEXT: pextrb $6, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm5, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 10(%rdi) -; SSE42-NEXT: pextrb $1, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm4, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm4, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm4, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm4, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm4, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 8(%rdi) -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm3, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm6, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 12(%rax) +; SSE42-NEXT: pextrb $1, %xmm5, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: pextrb $5, %xmm5, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm2, %edx +; SSE42-NEXT: pextrb $6, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm2, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm2, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm2, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm2, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm5, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 10(%rax) +; SSE42-NEXT: pextrb $1, %xmm4, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm4, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 8(%rax) +; SSE42-NEXT: pextrb $1, %xmm3, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 6(%rax) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm2, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 4(%rax) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm0, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm0, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v128i8: ; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9 ; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8 @@ -5367,1010 +5368,1010 @@ ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtb %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $1, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm3, %ecx +; AVX1-NEXT: vpextrb $1, %xmm3, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm3, %edx +; AVX1-NEXT: vpextrb $0, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm3, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm3, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm3, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm3, %ecx +; AVX1-NEXT: vpextrb $5, %xmm3, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm3, %edx +; AVX1-NEXT: vpextrb $6, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm6, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm6, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm6, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm6, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm6, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm6, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm6, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm6, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 12(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm2, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm2, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm2, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm2, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm2, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm6, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm5, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm6, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 12(%rax) +; AVX1-NEXT: vpextrb $1, %xmm2, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: vpextrb $0, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm5, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm5, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm5, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm5, %ecx +; AVX1-NEXT: vpextrb $5, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm5, %edx +; AVX1-NEXT: vpextrb $6, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm5, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm5, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm5, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 8(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm4, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm4, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm4, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm4, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm4, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm5, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm4, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm5, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 8(%rax) +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: vpextrb $0, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm4, %ecx +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm4, %edx +; AVX1-NEXT: vpextrb $6, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm4, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 4(%rax) +; AVX1-NEXT: vpextrb $1, %xmm9, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: vpextrb $0, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm4, %ecx +; AVX1-NEXT: vpextrb $5, %xmm9, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm4, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm9, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm9, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm9, %edx +; AVX1-NEXT: vpextrb $6, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm9, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm9, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm9, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm9, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm8, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm8, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm8, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm8, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm8, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm8, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm8, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm8, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm8, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm8, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, (%rax) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v128i8: ; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm3, %ecx +; AVX2-NEXT: vpextrb $1, %xmm3, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm3, %edx +; AVX2-NEXT: vpextrb $0, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm3, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm3, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $3, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm3, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: vpextrb $5, %xmm3, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm3, %edx +; AVX2-NEXT: vpextrb $6, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3 -; AVX2-NEXT: vpextrb $0, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm3, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $3, %xmm3, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm3, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm3, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm3, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3 +; AVX2-NEXT: vpextrb $0, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm3, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm3, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm3, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 12(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm2, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm2, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm2, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm2, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm2, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 -; AVX2-NEXT: vpextrb $0, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm2, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm3, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 12(%rax) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: vpextrb $0, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm2, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm2, %edx +; AVX2-NEXT: vpextrb $6, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm2, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm2, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm2, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm2, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 8(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX2-NEXT: vpextrb $0, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm2, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 8(%rax) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: vpextrb $0, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm0, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 4(%rax) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: vpextrb $0, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: vpextrb $6, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test_cmp_v128i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 @@ -6395,20 +6396,20 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k7 -; AVX512F-NEXT: kmovw %k7, 14(%rdi) -; AVX512F-NEXT: kmovw %k6, 12(%rdi) -; AVX512F-NEXT: kmovw %k5, 10(%rdi) -; AVX512F-NEXT: kmovw %k4, 8(%rdi) -; AVX512F-NEXT: kmovw %k3, 6(%rdi) -; AVX512F-NEXT: kmovw %k2, 4(%rdi) -; AVX512F-NEXT: kmovw %k1, 2(%rdi) -; AVX512F-NEXT: kmovw %k0, (%rdi) -; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: kmovw %k7, 14(%rax) +; AVX512F-NEXT: kmovw %k6, 12(%rax) +; AVX512F-NEXT: kmovw %k5, 10(%rax) +; AVX512F-NEXT: kmovw %k4, 8(%rax) +; AVX512F-NEXT: kmovw %k3, 6(%rax) +; AVX512F-NEXT: kmovw %k2, 4(%rax) +; AVX512F-NEXT: kmovw %k1, 2(%rax) +; AVX512F-NEXT: kmovw %k0, (%rax) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v128i8: ; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512DQ-NEXT: vpmovd2m %zmm4, %k0 @@ -6433,15 +6434,14 @@ ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k7 -; AVX512DQ-NEXT: kmovw %k7, 14(%rdi) -; AVX512DQ-NEXT: kmovw %k6, 12(%rdi) -; AVX512DQ-NEXT: kmovw %k5, 10(%rdi) -; AVX512DQ-NEXT: kmovw %k4, 8(%rdi) -; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) -; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) -; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) -; AVX512DQ-NEXT: kmovw %k0, (%rdi) -; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: kmovw %k7, 14(%rax) +; AVX512DQ-NEXT: kmovw %k6, 12(%rax) +; AVX512DQ-NEXT: kmovw %k5, 10(%rax) +; AVX512DQ-NEXT: kmovw %k4, 8(%rax) +; AVX512DQ-NEXT: kmovw %k3, 6(%rax) +; AVX512DQ-NEXT: kmovw %k2, 4(%rax) +; AVX512DQ-NEXT: kmovw %k1, 2(%rax) +; AVX512DQ-NEXT: kmovw %k0, (%rax) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -6463,6 +6463,7 @@ define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f64: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: cmpltpd %xmm7, %xmm8 ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 @@ -6510,126 +6511,125 @@ ; SSE2-NEXT: packssdw %xmm4, %xmm0 ; SSE2-NEXT: packsswb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f64: @@ -6675,24 +6675,24 @@ ; SSE42-NEXT: pextrb $8, %xmm0, %r9d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pextrb $0, %xmm0, %esi +; SSE42-NEXT: pextrb $0, %xmm0, %edx ; SSE42-NEXT: pextrb $8, %xmm0, %r12d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %esi ; SSE42-NEXT: pextrb $8, %xmm0, %ebx ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pextrb $0, %xmm0, %eax +; SSE42-NEXT: pextrb $0, %xmm0, %ecx ; SSE42-NEXT: pextrb $8, %xmm0, %r13d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: andl $1, %r8d ; SSE42-NEXT: andl $1, %r10d -; SSE42-NEXT: leal (%r10,%r8,2), %ecx +; SSE42-NEXT: leal (%r10,%r8,2), %eax ; SSE42-NEXT: andl $1, %ebp -; SSE42-NEXT: leal (%rcx,%rbp,4), %r8d -; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: leal (%rax,%rbp,4), %r8d +; SSE42-NEXT: pextrb $0, %xmm0, %eax ; SSE42-NEXT: pextrb $8, %xmm0, %ebp ; SSE42-NEXT: andl $1, %edi ; SSE42-NEXT: leal (%r8,%rdi,8), %r8d @@ -6701,7 +6701,7 @@ ; SSE42-NEXT: orl %r8d, %r15d ; SSE42-NEXT: pextrb $8, %xmm1, %edi ; SSE42-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill -; SSE42-NEXT: pextrb $0, %xmm1, %r10d +; SSE42-NEXT: pextrb $0, %xmm1, %r8d ; SSE42-NEXT: andl $1, %r11d ; SSE42-NEXT: shll $5, %r11d ; SSE42-NEXT: orl %r15d, %r11d @@ -6710,93 +6710,93 @@ ; SSE42-NEXT: andl $1, %r9d ; SSE42-NEXT: shll $7, %r9d ; SSE42-NEXT: orl %r14d, %r9d -; SSE42-NEXT: pextrb $0, %xmm2, %r14d +; SSE42-NEXT: pextrb $0, %xmm2, %r10d ; SSE42-NEXT: pextrb $8, %xmm2, %edi ; SSE42-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill -; SSE42-NEXT: andl $1, %esi -; SSE42-NEXT: shll $8, %esi -; SSE42-NEXT: orl %r9d, %esi +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %r9d, %edx ; SSE42-NEXT: andl $1, %r12d ; SSE42-NEXT: shll $9, %r12d -; SSE42-NEXT: orl %esi, %r12d -; SSE42-NEXT: pextrb $0, %xmm3, %r8d -; SSE42-NEXT: pextrb $8, %xmm3, %r15d -; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $10, %edx -; SSE42-NEXT: orl %r12d, %edx +; SSE42-NEXT: orl %edx, %r12d +; SSE42-NEXT: pextrb $0, %xmm3, %edi +; SSE42-NEXT: pextrb $8, %xmm3, %r9d +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $10, %esi +; SSE42-NEXT: orl %r12d, %esi ; SSE42-NEXT: andl $1, %ebx ; SSE42-NEXT: shll $11, %ebx -; SSE42-NEXT: orl %edx, %ebx -; SSE42-NEXT: pextrb $0, %xmm4, %r12d -; SSE42-NEXT: pextrb $8, %xmm4, %edi -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $12, %eax -; SSE42-NEXT: orl %ebx, %eax +; SSE42-NEXT: orl %esi, %ebx +; SSE42-NEXT: pextrb $0, %xmm4, %r15d +; SSE42-NEXT: pextrb $8, %xmm4, %r12d +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: orl %ebx, %ecx ; SSE42-NEXT: andl $1, %r13d ; SSE42-NEXT: shll $13, %r13d -; SSE42-NEXT: orl %eax, %r13d -; SSE42-NEXT: pextrb $0, %xmm5, %eax +; SSE42-NEXT: orl %ecx, %r13d +; SSE42-NEXT: pextrb $0, %xmm5, %ecx ; SSE42-NEXT: pextrb $8, %xmm5, %ebx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %r13d, %ecx +; SSE42-NEXT: andl $1, %eax +; SSE42-NEXT: shll $14, %eax +; SSE42-NEXT: orl %r13d, %eax ; SSE42-NEXT: shll $15, %ebp -; SSE42-NEXT: orl %ecx, %ebp +; SSE42-NEXT: orl %eax, %ebp ; SSE42-NEXT: pextrb $0, %xmm6, %r13d -; SSE42-NEXT: pextrb $8, %xmm6, %edx +; SSE42-NEXT: pextrb $8, %xmm6, %esi ; SSE42-NEXT: orl %r11d, %ebp -; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; SSE42-NEXT: movw %bp, 2(%r9) +; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; SSE42-NEXT: movw %bp, 2(%r14) ; SSE42-NEXT: pextrb $0, %xmm7, %r11d -; SSE42-NEXT: pextrb $8, %xmm7, %ecx -; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %eax +; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: andl $1, %r8d +; SSE42-NEXT: leal (%r8,%rdx,2), %r8d ; SSE42-NEXT: andl $1, %r10d -; SSE42-NEXT: leal (%r10,%rsi,2), %esi -; SSE42-NEXT: andl $1, %r14d -; SSE42-NEXT: leal (%rsi,%r14,4), %r14d +; SSE42-NEXT: leal (%r8,%r10,4), %r8d ; SSE42-NEXT: pextrb $0, %xmm8, %r10d ; SSE42-NEXT: pextrb $8, %xmm8, %ebp -; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; SSE42-NEXT: andl $1, %esi -; SSE42-NEXT: leal (%r14,%rsi,8), %esi -; SSE42-NEXT: andl $1, %r8d -; SSE42-NEXT: shll $4, %r8d -; SSE42-NEXT: orl %esi, %r8d +; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%r8,%rdx,8), %r8d +; SSE42-NEXT: andl $1, %edi +; SSE42-NEXT: shll $4, %edi +; SSE42-NEXT: orl %r8d, %edi +; SSE42-NEXT: andl $1, %r9d +; SSE42-NEXT: shll $5, %r9d +; SSE42-NEXT: orl %edi, %r9d ; SSE42-NEXT: andl $1, %r15d -; SSE42-NEXT: shll $5, %r15d -; SSE42-NEXT: orl %r8d, %r15d +; SSE42-NEXT: shll $6, %r15d ; SSE42-NEXT: andl $1, %r12d -; SSE42-NEXT: shll $6, %r12d -; SSE42-NEXT: andl $1, %edi -; SSE42-NEXT: shll $7, %edi -; SSE42-NEXT: orl %r12d, %edi -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $8, %eax -; SSE42-NEXT: orl %edi, %eax +; SSE42-NEXT: shll $7, %r12d +; SSE42-NEXT: orl %r15d, %r12d +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: orl %r12d, %ecx ; SSE42-NEXT: andl $1, %ebx ; SSE42-NEXT: shll $9, %ebx -; SSE42-NEXT: orl %eax, %ebx +; SSE42-NEXT: orl %ecx, %ebx ; SSE42-NEXT: andl $1, %r13d ; SSE42-NEXT: shll $10, %r13d ; SSE42-NEXT: orl %ebx, %r13d -; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %r13d, %edx +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %r13d, %esi ; SSE42-NEXT: andl $1, %r11d ; SSE42-NEXT: shll $12, %r11d -; SSE42-NEXT: orl %edx, %r11d -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $13, %ecx -; SSE42-NEXT: orl %r11d, %ecx +; SSE42-NEXT: orl %esi, %r11d +; SSE42-NEXT: andl $1, %eax +; SSE42-NEXT: shll $13, %eax +; SSE42-NEXT: orl %r11d, %eax ; SSE42-NEXT: andl $1, %r10d ; SSE42-NEXT: shll $14, %r10d -; SSE42-NEXT: orl %ecx, %r10d +; SSE42-NEXT: orl %eax, %r10d ; SSE42-NEXT: shll $15, %ebp ; SSE42-NEXT: orl %r10d, %ebp -; SSE42-NEXT: orl %r15d, %ebp -; SSE42-NEXT: movw %bp, (%r9) -; SSE42-NEXT: movq %r9, %rax +; SSE42-NEXT: orl %r9d, %ebp +; SSE42-NEXT: movw %bp, (%r14) +; SSE42-NEXT: movq %r14, %rax ; SSE42-NEXT: popq %rbx ; SSE42-NEXT: popq %r12 ; SSE42-NEXT: popq %r13 @@ -6943,6 +6943,7 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i64: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] ; SSE2-NEXT: pxor %xmm8, %xmm7 ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 @@ -7171,130 +7172,130 @@ ; SSE2-NEXT: pand %xmm9, %xmm3 ; SSE2-NEXT: packuswb %xmm1, %xmm3 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rax) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rax) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i64: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm15 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm13 @@ -7319,125 +7320,124 @@ ; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm13 ; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm15 -; SSE42-NEXT: pextrb $8, %xmm15, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $0, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $8, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm13, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $8, %xmm13, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $0, %xmm12, %ecx +; SSE42-NEXT: pextrb $8, %xmm15, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $8, %xmm12, %edx +; SSE42-NEXT: pextrb $0, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm11, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $0, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm10, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm10, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $8, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm9, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm9, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm13, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm8, %ecx +; SSE42-NEXT: pextrb $8, %xmm13, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm8, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm12, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm4, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $8, %xmm12, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm11, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm5, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm11, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm10, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm6, %ecx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm10, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm9, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm9, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm8, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm8, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rax) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm6, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $0, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: pextrb $8, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $8, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm6, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rax) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i64: Index: test/CodeGen/X86/vector-interleave.ll =================================================================== --- test/CodeGen/X86/vector-interleave.ll +++ test/CodeGen/X86/vector-interleave.ll @@ -10,6 +10,7 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e, <8 x i16> %f, <8 x i16> %h, <8 x i16> %g) { ; SSE-LABEL: interleave8x8: ; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movdqa %xmm0, %xmm8 ; SSE-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -46,15 +47,14 @@ ; SSE-NEXT: movdqa %xmm3, %xmm4 ; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm6[4],xmm3[5],xmm6[5],xmm3[6],xmm6[6],xmm3[7],xmm6[7] -; SSE-NEXT: movdqa %xmm3, 112(%rdi) -; SSE-NEXT: movdqa %xmm4, 96(%rdi) -; SSE-NEXT: movdqa %xmm0, 80(%rdi) -; SSE-NEXT: movdqa %xmm7, 64(%rdi) -; SSE-NEXT: movdqa %xmm2, 48(%rdi) -; SSE-NEXT: movdqa %xmm1, 32(%rdi) -; SSE-NEXT: movdqa %xmm8, 16(%rdi) -; SSE-NEXT: movdqa %xmm5, (%rdi) -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movdqa %xmm3, 112(%rax) +; SSE-NEXT: movdqa %xmm4, 96(%rax) +; SSE-NEXT: movdqa %xmm0, 80(%rax) +; SSE-NEXT: movdqa %xmm7, 64(%rax) +; SSE-NEXT: movdqa %xmm2, 48(%rax) +; SSE-NEXT: movdqa %xmm1, 32(%rax) +; SSE-NEXT: movdqa %xmm8, 16(%rax) +; SSE-NEXT: movdqa %xmm5, (%rax) ; SSE-NEXT: retq ; ; AVX1-LABEL: interleave8x8: Index: test/CodeGen/X86/vector-pcmp.ll =================================================================== --- test/CodeGen/X86/vector-pcmp.ll +++ test/CodeGen/X86/vector-pcmp.ll @@ -86,10 +86,10 @@ define <1 x i128> @test_strange_type(<1 x i128> %x) { ; CHECK-LABEL: test_strange_type: ; CHECK: # %bb.0: -; CHECK-NEXT: sarq $63, %rsi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: notq %rax +; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: retq %sign = ashr <1 x i128> %x, %not = xor <1 x i128> %sign, Index: test/CodeGen/X86/vector-rotate-128.ll =================================================================== --- test/CodeGen/X86/vector-rotate-128.ll +++ test/CodeGen/X86/vector-rotate-128.ll @@ -351,60 +351,61 @@ ; ; SSE41-LABEL: var_rotate_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psubw %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm4 +; SSE41-NEXT: psllw $4, %xmm3 +; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm4 ; SSE41-NEXT: paddw %xmm4, %xmm4 -; SSE41-NEXT: movdqa %xmm3, %xmm6 +; SSE41-NEXT: movdqa %xmm1, %xmm6 ; SSE41-NEXT: psllw $8, %xmm6 -; SSE41-NEXT: movdqa %xmm3, %xmm5 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm6, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $4, %xmm3 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $2, %xmm3 ; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 +; SSE41-NEXT: movdqa %xmm5, %xmm3 +; SSE41-NEXT: psllw $1, %xmm3 ; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 ; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: por %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $4, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $2, %xmm2 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: paddw %xmm3, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $1, %xmm2 -; SSE41-NEXT: paddw %xmm1, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm3 -; SSE41-NEXT: por %xmm5, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: por %xmm5, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_rotate_v8i16: Index: test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-128.ll +++ test/CodeGen/X86/vector-shift-ashr-128.ll @@ -267,32 +267,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psraw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -237,32 +237,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -194,32 +194,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: paddw %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psllw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psllw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psllw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -164,8 +164,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_22: ; SSE2: # %bb.0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_22: @@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_32: ; SSE: # %bb.0: -; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] ; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_32: @@ -208,8 +208,8 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_33: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_33: @@ -329,8 +329,8 @@ define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_3u: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_3u: @@ -357,8 +357,8 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02_copy: ; SSE: # %bb.0: -; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02_copy: @@ -402,26 +402,26 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_03_copy: @@ -464,26 +464,26 @@ define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_12_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_12_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_12_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_12_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_12_copy: @@ -509,8 +509,8 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_13_copy: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_13_copy: @@ -537,8 +537,8 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20_copy: ; SSE: # %bb.0: -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20_copy: @@ -579,26 +579,26 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_21_copy: @@ -641,26 +641,26 @@ define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_30_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_30_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_30_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_30_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_30_copy: @@ -687,8 +687,8 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_31_copy: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_31_copy: Index: test/CodeGen/X86/vector-shuffle-combining-sse4a.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-sse4a.ll +++ test/CodeGen/X86/vector-shuffle-combining-sse4a.ll @@ -33,8 +33,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_16i8: ; SSSE3: # %bb.0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_16i8: @@ -54,8 +54,8 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_8i16: ; SSSE3: # %bb.0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_8i16: Index: test/CodeGen/X86/vector-shuffle-combining-ssse3.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -606,8 +606,8 @@ define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: combine_unpckl_arg1_pshufb: ; SSE: # %bb.0: -; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: combine_unpckl_arg1_pshufb: Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -1624,8 +1624,8 @@ define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test1b: ; SSE: # %bb.0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test1b: @@ -1640,8 +1640,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: combine_test2b: ; SSE2: # %bb.0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: combine_test2b: @@ -1695,8 +1695,8 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test4b: ; SSE: # %bb.0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test4b: @@ -2766,30 +2766,30 @@ define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) { ; SSE2-LABEL: PR22412: ; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm3, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,2] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: PR22412: ; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm3, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,2] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: PR22412: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] -; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] -; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[3,2] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[3,2] +; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: PR22412: Index: test/CodeGen/X86/vector-zext.ll =================================================================== --- test/CodeGen/X86/vector-zext.ll +++ test/CodeGen/X86/vector-zext.ll @@ -2123,6 +2123,7 @@ define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { ; SSE2-LABEL: zext_32i8_to_32i32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2142,19 +2143,19 @@ ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: movdqa %xmm1, 112(%rdi) -; SSE2-NEXT: movdqa %xmm4, 96(%rdi) -; SSE2-NEXT: movdqa %xmm6, 80(%rdi) -; SSE2-NEXT: movdqa %xmm7, 64(%rdi) -; SSE2-NEXT: movdqa %xmm0, 48(%rdi) -; SSE2-NEXT: movdqa %xmm5, 32(%rdi) -; SSE2-NEXT: movdqa %xmm3, 16(%rdi) -; SSE2-NEXT: movdqa %xmm8, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movdqa %xmm1, 112(%rax) +; SSE2-NEXT: movdqa %xmm4, 96(%rax) +; SSE2-NEXT: movdqa %xmm6, 80(%rax) +; SSE2-NEXT: movdqa %xmm7, 64(%rax) +; SSE2-NEXT: movdqa %xmm0, 48(%rax) +; SSE2-NEXT: movdqa %xmm5, 32(%rax) +; SSE2-NEXT: movdqa %xmm3, 16(%rax) +; SSE2-NEXT: movdqa %xmm8, (%rax) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_32i8_to_32i32: ; SSSE3: # %bb.0: +; SSSE3-NEXT: movq %rdi, %rax ; SSSE3-NEXT: pxor %xmm2, %xmm2 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2174,19 +2175,19 @@ ; SSSE3-NEXT: movdqa %xmm1, %xmm4 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSSE3-NEXT: movdqa %xmm1, 112(%rdi) -; SSSE3-NEXT: movdqa %xmm4, 96(%rdi) -; SSSE3-NEXT: movdqa %xmm6, 80(%rdi) -; SSSE3-NEXT: movdqa %xmm7, 64(%rdi) -; SSSE3-NEXT: movdqa %xmm0, 48(%rdi) -; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) -; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) -; SSSE3-NEXT: movdqa %xmm8, (%rdi) -; SSSE3-NEXT: movq %rdi, %rax +; SSSE3-NEXT: movdqa %xmm1, 112(%rax) +; SSSE3-NEXT: movdqa %xmm4, 96(%rax) +; SSSE3-NEXT: movdqa %xmm6, 80(%rax) +; SSSE3-NEXT: movdqa %xmm7, 64(%rax) +; SSSE3-NEXT: movdqa %xmm0, 48(%rax) +; SSSE3-NEXT: movdqa %xmm5, 32(%rax) +; SSSE3-NEXT: movdqa %xmm3, 16(%rax) +; SSSE3-NEXT: movdqa %xmm8, (%rax) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_32i8_to_32i32: ; SSE41: # %bb.0: +; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero @@ -2201,15 +2202,14 @@ ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; SSE41-NEXT: movdqa %xmm1, 112(%rdi) -; SSE41-NEXT: movdqa %xmm7, 96(%rdi) -; SSE41-NEXT: movdqa %xmm6, 80(%rdi) -; SSE41-NEXT: movdqa %xmm5, 64(%rdi) -; SSE41-NEXT: movdqa %xmm0, 48(%rdi) -; SSE41-NEXT: movdqa %xmm4, 32(%rdi) -; SSE41-NEXT: movdqa %xmm3, 16(%rdi) -; SSE41-NEXT: movdqa %xmm2, (%rdi) -; SSE41-NEXT: movq %rdi, %rax +; SSE41-NEXT: movdqa %xmm1, 112(%rax) +; SSE41-NEXT: movdqa %xmm7, 96(%rax) +; SSE41-NEXT: movdqa %xmm6, 80(%rax) +; SSE41-NEXT: movdqa %xmm5, 64(%rax) +; SSE41-NEXT: movdqa %xmm0, 48(%rax) +; SSE41-NEXT: movdqa %xmm4, 32(%rax) +; SSE41-NEXT: movdqa %xmm3, 16(%rax) +; SSE41-NEXT: movdqa %xmm2, (%rax) ; SSE41-NEXT: retq ; ; AVX1-LABEL: zext_32i8_to_32i32: Index: test/CodeGen/X86/vectorcall.ll =================================================================== --- test/CodeGen/X86/vectorcall.ll +++ test/CodeGen/X86/vectorcall.ll @@ -22,7 +22,8 @@ } ; X86-LABEL: {{^}}test_int_3@@8: ; X64-LABEL: {{^}}test_int_3@@8: -; CHECK: movl %ecx, %eax +; X86: movl %ecx, %eax +; X64: movq %rcx, %rax define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) { %s = add i32 %a, %b @@ -148,8 +149,8 @@ ret <4 x float> %0 } ; CHECK-LABEL: test_mixed_5 -; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp) -; CHECK: movaps %xmm5, %xmm0 +; CHECK-DAG: movaps %xmm{{[0,5]}}, 16(%{{(e|r)}}sp) +; CHECK-DAG: movaps %xmm5, %xmm0 ; CHECK: ret{{[ql]}} define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) { @@ -183,12 +184,12 @@ ret void } ; CHECK-LABEL: test_mixed_7 -; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}}) ; X64: mov{{[ql]}} %rcx, %rax +; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rax|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rax|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rax|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rax|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, (%{{rax|eax}}) ; CHECK: ret{{[ql]}} define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) { Index: test/CodeGen/X86/vselect-minmax.ll =================================================================== --- test/CodeGen/X86/vselect-minmax.ll +++ test/CodeGen/X86/vselect-minmax.ll @@ -4535,23 +4535,24 @@ ; ; SSE4-LABEL: test121: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm6, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test121: @@ -4655,23 +4656,24 @@ ; ; SSE4-LABEL: test122: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm6, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test122: @@ -4775,9 +4777,10 @@ ; ; SSE4-LABEL: test123: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -4785,12 +4788,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test123: @@ -4894,9 +4897,10 @@ ; ; SSE4-LABEL: test124: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -4904,12 +4908,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test124: @@ -5013,36 +5017,39 @@ ; ; SSE4-LABEL: test125: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm9, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test125: @@ -5160,36 +5167,39 @@ ; ; SSE4-LABEL: test126: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm9, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test126: @@ -5307,35 +5317,38 @@ ; ; SSE4-LABEL: test127: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: pxor %xmm7, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm1 ; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm5, %xmm0 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test127: @@ -5453,35 +5466,38 @@ ; ; SSE4-LABEL: test128: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: pxor %xmm7, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm1 ; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm5, %xmm0 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test128: @@ -6977,9 +6993,10 @@ ; ; SSE4-LABEL: test153: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -6987,12 +7004,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test153: @@ -7096,9 +7113,10 @@ ; ; SSE4-LABEL: test154: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -7106,12 +7124,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test154: @@ -7215,23 +7233,24 @@ ; ; SSE4-LABEL: test155: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm6, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test155: @@ -7335,35 +7354,38 @@ ; ; SSE4-LABEL: test156: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: pxor %xmm7, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm1 ; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm5, %xmm0 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test156: @@ -7481,36 +7503,39 @@ ; ; SSE4-LABEL: test159: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm9, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test159: @@ -7628,36 +7653,39 @@ ; ; SSE4-LABEL: test160: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm9, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm5, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test160: Index: test/CodeGen/X86/vselect.ll =================================================================== --- test/CodeGen/X86/vselect.ll +++ test/CodeGen/X86/vselect.ll @@ -457,25 +457,25 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { ; SSE-LABEL: select_illegal: ; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 -; SSE-NEXT: movaps %xmm7, 112(%rdi) -; SSE-NEXT: movaps %xmm6, 96(%rdi) -; SSE-NEXT: movaps %xmm5, 80(%rdi) -; SSE-NEXT: movaps %xmm4, 64(%rdi) -; SSE-NEXT: movaps %xmm3, 48(%rdi) -; SSE-NEXT: movaps %xmm2, 32(%rdi) -; SSE-NEXT: movaps %xmm1, 16(%rdi) -; SSE-NEXT: movaps %xmm0, (%rdi) -; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movaps %xmm7, 112(%rax) +; SSE-NEXT: movaps %xmm6, 96(%rax) +; SSE-NEXT: movaps %xmm5, 80(%rax) +; SSE-NEXT: movaps %xmm4, 64(%rax) +; SSE-NEXT: movaps %xmm3, 48(%rax) +; SSE-NEXT: movaps %xmm2, 32(%rax) +; SSE-NEXT: movaps %xmm1, 16(%rax) +; SSE-NEXT: movaps %xmm0, (%rax) ; SSE-NEXT: retq ; ; AVX-LABEL: select_illegal: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: vmovaps %ymm7, %ymm3 +; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: retq %sel = select <16 x i1> , <16 x double> %a, <16 x double> %b ret <16 x double> %sel Index: test/CodeGen/X86/widen_bitops-0.ll =================================================================== --- test/CodeGen/X86/widen_bitops-0.ll +++ test/CodeGen/X86/widen_bitops-0.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v3i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v3i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v3i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v8i3: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v8i3: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v8i3: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> Index: test/CodeGen/X86/widen_bitops-1.ll =================================================================== --- test/CodeGen/X86/widen_bitops-1.ll +++ test/CodeGen/X86/widen_bitops-1.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v4i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v4i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v4i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v8i4: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v8i4: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v8i4: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -21,11 +21,11 @@ ; ; X64-LABEL: add3i32: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: paddd (%rdx), %xmm0 -; X64-NEXT: pextrd $2, %xmm0, 8(%rdi) -; X64-NEXT: movq %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm0, 8(%rax) +; X64-NEXT: movq %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 16 %b = load %i32vec3, %i32vec3* %bp, align 16 @@ -54,14 +54,14 @@ ; ; X64-LABEL: add3i32_2: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rsi), %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rdx), %xmm1 ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrd $2, %xmm1, 8(%rdi) -; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm1, 8(%rax) +; X64-NEXT: movq %xmm1, (%rax) ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 8 %b = load %i32vec3, %i32vec3* %bp, align 8 @@ -89,14 +89,14 @@ ; ; X64-LABEL: add7i32: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: paddd 16(%rdx), %xmm1 -; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrd $2, %xmm1, 24(%rax) +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec7, %i32vec7* %ap, align 16 %b = load %i32vec7, %i32vec7* %bp, align 16 @@ -125,16 +125,16 @@ ; ; X64-LABEL: add12i32: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: paddd 16(%rdx), %xmm1 ; X64-NEXT: paddd 32(%rdx), %xmm2 -; X64-NEXT: movdqa %xmm2, 32(%rdi) -; X64-NEXT: movdqa %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movdqa %xmm2, 32(%rax) +; X64-NEXT: movdqa %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i32vec12, %i32vec12* %ap, align 16 %b = load %i32vec12, %i32vec12* %bp, align 16 @@ -171,13 +171,13 @@ ; ; X64-LABEL: add3i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrw $4, %xmm1, 4(%rdi) +; X64-NEXT: pextrw $4, %xmm1, 4(%rax) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; X64-NEXT: movd %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movd %xmm1, (%rax) ; X64-NEXT: retq %a = load %i16vec3, %i16vec3* %ap, align 16 %b = load %i16vec3, %i16vec3* %bp, align 16 @@ -201,11 +201,11 @@ ; ; X64-LABEL: add4i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: paddw %xmm0, %xmm1 -; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %xmm1, (%rax) ; X64-NEXT: retq %a = load %i16vec4, %i16vec4* %ap, align 16 %b = load %i16vec4, %i16vec4* %bp, align 16 @@ -232,13 +232,13 @@ ; ; X64-LABEL: add12i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i16vec12, %i16vec12* %ap, align 16 %b = load %i16vec12, %i16vec12* %bp, align 16 @@ -267,16 +267,16 @@ ; ; X64-LABEL: add18i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 ; X64-NEXT: paddw 32(%rdx), %xmm2 -; X64-NEXT: movd %xmm2, 32(%rdi) -; X64-NEXT: movdqa %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movd %xmm2, 32(%rax) +; X64-NEXT: movdqa %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i16vec18, %i16vec18* %ap, align 16 %b = load %i16vec18, %i16vec18* %bp, align 16 @@ -305,13 +305,13 @@ ; ; X64-LABEL: add3i8: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: paddd %xmm0, %xmm1 -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) +; X64-NEXT: pextrb $8, %xmm1, 2(%rax) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pextrw $0, %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrw $0, %xmm1, (%rax) ; X64-NEXT: retq %a = load %i8vec3, %i8vec3* %ap, align 16 %b = load %i8vec3, %i8vec3* %bp, align 16 @@ -341,16 +341,16 @@ ; ; X64-LABEL: add31i8: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddb (%rdx), %xmm0 ; X64-NEXT: paddb 16(%rdx), %xmm1 -; X64-NEXT: pextrb $14, %xmm1, 30(%rdi) -; X64-NEXT: pextrw $6, %xmm1, 28(%rdi) -; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) -; X64-NEXT: movq %xmm1, 16(%rdi) -; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrb $14, %xmm1, 30(%rax) +; X64-NEXT: pextrw $6, %xmm1, 28(%rax) +; X64-NEXT: pextrd $2, %xmm1, 24(%rax) +; X64-NEXT: movq %xmm1, 16(%rax) +; X64-NEXT: movdqa %xmm0, (%rax) ; X64-NEXT: retq %a = load %i8vec31, %i8vec31* %ap, align 16 %b = load %i8vec31, %i8vec31* %bp, align 16 @@ -384,6 +384,7 @@ ; ; X64-LABEL: rot: ; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movb $-98, 2(%rsi) ; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E ; X64-NEXT: movb $1, 2(%rdx) @@ -393,9 +394,8 @@ ; X64-NEXT: psrld $1, %xmm1 ; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) -; X64-NEXT: pextrw $0, %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: pextrb $8, %xmm1, 2(%rax) +; X64-NEXT: pextrw $0, %xmm0, (%rax) ; X64-NEXT: retq entry: %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* Index: test/CodeGen/X86/widen_load-3.ll =================================================================== --- test/CodeGen/X86/widen_load-3.ll +++ test/CodeGen/X86/widen_load-3.ll @@ -41,26 +41,26 @@ ; ; X64-SSE-LABEL: load7_aligned: ; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movaps (%rsi), %xmm0 ; X64-SSE-NEXT: movaps 16(%rsi), %xmm1 ; X64-SSE-NEXT: movaps 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) -; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) -; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rax) +; X64-SSE-NEXT: movaps %xmm2, 32(%rax) +; X64-SSE-NEXT: movaps %xmm1, 16(%rax) +; X64-SSE-NEXT: movaps %xmm0, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_aligned: ; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovaps (%rsi), %ymm0 ; X64-AVX-NEXT: vmovaps 32(%rsi), %ymm1 -; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 -; X64-AVX-NEXT: vmovlps %xmm0, 48(%rdi) -; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: movq %rdi, %rax +; X64-AVX-NEXT: vmovlps %xmm0, 48(%rax) +; X64-AVX-NEXT: vmovaps %xmm1, 32(%rax) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x @@ -101,26 +101,26 @@ ; ; X64-SSE-LABEL: load7_unaligned: ; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movups (%rsi), %xmm0 ; X64-SSE-NEXT: movups 16(%rsi), %xmm1 ; X64-SSE-NEXT: movups 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) -; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) -; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rax) +; X64-SSE-NEXT: movaps %xmm2, 32(%rax) +; X64-SSE-NEXT: movaps %xmm1, 16(%rax) +; X64-SSE-NEXT: movaps %xmm0, (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_unaligned: ; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovups (%rsi), %ymm0 ; X64-AVX-NEXT: vmovups 32(%rsi), %xmm1 -; X64-AVX-NEXT: movq 48(%rsi), %rax -; X64-AVX-NEXT: movq %rax, 48(%rdi) -; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) -; X64-AVX-NEXT: movq %rdi, %rax +; X64-AVX-NEXT: movq 48(%rsi), %rcx +; X64-AVX-NEXT: movq %rcx, 48(%rax) +; X64-AVX-NEXT: vmovaps %xmm1, 32(%rax) +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x, align 1 Index: test/CodeGen/X86/win64_vararg.ll =================================================================== --- test/CodeGen/X86/win64_vararg.ll +++ test/CodeGen/X86/win64_vararg.ll @@ -121,10 +121,10 @@ } ; CHECK-LABEL: sret_arg: ; CHECK: pushq +; CHECK: movq %rcx, %rax ; CHECK-DAG: movq %r9, 40(%rsp) ; CHECK-DAG: movq %r8, 32(%rsp) ; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]] -; CHECK: movl %[[tmp]], (%[[sret:[^ ]*]]) -; CHECK: movq %[[sret]], %rax +; CHECK: movl %[[tmp]], (%rax) ; CHECK: popq ; CHECK: retq Index: test/CodeGen/X86/x64-cet-intrinsics.ll =================================================================== --- test/CodeGen/X86/x64-cet-intrinsics.ll +++ test/CodeGen/X86/x64-cet-intrinsics.ll @@ -30,8 +30,8 @@ define i32 @test_rdsspd(i32 %a) { ; CHECK-LABEL: test_rdsspd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: rdsspd %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: rdsspd %eax ; CHECK-NEXT: retq entry: %0 = call i32 @llvm.x86.rdsspd(i32 %a) @@ -43,8 +43,8 @@ define i64 @test_rdsspq(i64 %a) { ; CHECK-LABEL: test_rdsspq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: rdsspq %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: rdsspq %rax ; CHECK-NEXT: retq entry: %0 = call i64 @llvm.x86.rdsspq(i64 %a) Index: test/CodeGen/X86/x86-64-bittest-logic.ll =================================================================== --- test/CodeGen/X86/x86-64-bittest-logic.ll +++ test/CodeGen/X86/x86-64-bittest-logic.ll @@ -124,8 +124,8 @@ define i64 @and1_optsize(i64 %x) optsize { ; CHECK-LABEL: and1_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $31, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $31, %rax ; CHECK-NEXT: retq %a = and i64 %x, 18446744071562067967 ; clear bit 31 ret i64 %a @@ -134,8 +134,8 @@ define i64 @and2_optsize(i64 %x) optsize { ; CHECK-LABEL: and2_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $32, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $32, %rax ; CHECK-NEXT: retq %a = and i64 %x, 18446744069414584319 ; clear bit 32 ret i64 %a @@ -144,8 +144,8 @@ define i64 @and3_optsize(i64 %x) optsize { ; CHECK-LABEL: and3_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $62, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $62, %rax ; CHECK-NEXT: retq %a = and i64 %x, 13835058055282163711 ; clear bit 62 ret i64 %a @@ -154,8 +154,8 @@ define i64 @and4_optsize(i64 %x) optsize { ; CHECK-LABEL: and4_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $63, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $63, %rax ; CHECK-NEXT: retq %a = and i64 %x, 9223372036854775807 ; clear bit 63 ret i64 %a @@ -164,8 +164,8 @@ define i64 @or1_optsize(i64 %x) optsize { ; CHECK-LABEL: or1_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $31, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $31, %rax ; CHECK-NEXT: retq %a = or i64 %x, 2147483648 ; set bit 31 ret i64 %a @@ -174,8 +174,8 @@ define i64 @or2_optsize(i64 %x) optsize { ; CHECK-LABEL: or2_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $32, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $32, %rax ; CHECK-NEXT: retq %a = or i64 %x, 4294967296 ; set bit 32 ret i64 %a @@ -184,8 +184,8 @@ define i64 @or3_optsize(i64 %x) optsize { ; CHECK-LABEL: or3_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $62, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $62, %rax ; CHECK-NEXT: retq %a = or i64 %x, 4611686018427387904 ; set bit 62 ret i64 %a @@ -194,8 +194,8 @@ define i64 @or4_optsize(i64 %x) optsize { ; CHECK-LABEL: or4_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $63, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $63, %rax ; CHECK-NEXT: retq %a = or i64 %x, 9223372036854775808 ; set bit 63 ret i64 %a @@ -204,8 +204,8 @@ define i64 @xor1_optsize(i64 %x) optsize { ; CHECK-LABEL: xor1_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $31, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $31, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 2147483648 ; toggle bit 31 ret i64 %a @@ -214,8 +214,8 @@ define i64 @xor2_optsize(i64 %x) optsize { ; CHECK-LABEL: xor2_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $32, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $32, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 4294967296 ; toggle bit 32 ret i64 %a @@ -224,8 +224,8 @@ define i64 @xor3_optsize(i64 %x) optsize { ; CHECK-LABEL: xor3_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $62, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $62, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 4611686018427387904 ; toggle bit 62 ret i64 %a @@ -234,8 +234,8 @@ define i64 @xor4_optsize(i64 %x) optsize { ; CHECK-LABEL: xor4_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $63, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $63, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 9223372036854775808 ; toggle bit 63 ret i64 %a Index: test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll =================================================================== --- test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll +++ test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll @@ -14,8 +14,8 @@ define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 { ; CHECK-LABEL: _Z8lshift10mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shldq $10, %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shldq $10, %rsi, %rax ; CHECK-NEXT: retq entry: %shl = shl i64 %a, 10 @@ -40,8 +40,8 @@ define i64 @_Z8lshift11mm(i64 %a, i64 %b) #1 { ; CHECK-LABEL: _Z8lshift11mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shldq $11, %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shldq $11, %rsi, %rax ; CHECK-NEXT: retq entry: %shl = shl i64 %a, 11 Index: test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- test/CodeGen/X86/x86-cmov-converter.ll +++ test/CodeGen/X86/x86-cmov-converter.ll @@ -336,14 +336,14 @@ ; CHECK-LABEL: test_cmov_memoperand: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %load = load i32, i32* %y %z = select i1 %cond, i32 %x, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%rcx), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], % ret i32 %z } @@ -353,6 +353,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z1 = select i1 %cond, i32 %x, i32 %a @@ -362,17 +363,16 @@ ; CHECK: ja [[FALSE_BB:.*]] ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] -; CHECK-DAG: movl %{{.*}} %[[R3:.*]] +; CHECK-DAG: movl %{{.*}} %eax ; CHECK: [[FALSE_BB]]: ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -384,6 +384,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z2 = select i1 %cond, i32 %a, i32 %x @@ -393,17 +394,16 @@ ; CHECK: jbe [[FALSE_BB:.*]] ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] -; CHECK-DAG: movl %{{.*}} %[[R3:.*]] +; CHECK-DAG: movl %{{.*}} %eax ; CHECK: [[FALSE_BB]]: ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -434,15 +434,15 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %load = load i32, i32* %p %z = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %z } @@ -453,6 +453,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %load1 = load i32*, i32** %y %p = select i1 %cond, i32* %x, i32* %load1 @@ -461,9 +462,8 @@ ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] ; CHECK: movq (%r{{..}}), %[[R1:.*]] -; CHECK: movl (%[[R1]]), %[[R2:.*]] +; CHECK: movl (%[[R1]]), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R2]], %eax ; CHECK: retq ret i32 %z } @@ -475,6 +475,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %p2 = select i1 %cond, i32* %z, i32* %p @@ -482,9 +483,8 @@ %r = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %r } Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -72,6 +72,7 @@ ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: ; ; Shrink-wrapping allows to skip the prologue in the else case. +; ENABLE: movl %esi, %eax ; ENABLE: testl %edi, %edi ; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]] ; @@ -79,12 +80,11 @@ ; Make sure we save the CSR used in the inline asm: rbx. ; CHECK: pushq %rbx ; +; DISABLE: movl %esi, %eax ; DISABLE: testl %edi, %edi ; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] ; -; SUM is in %esi because it is coalesced with the second -; argument on the else path. -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; Next BB. @@ -98,23 +98,20 @@ ; SUM << 3. ; CHECK: shll $3, [[SUM]] ; -; Jump to epilogue. -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: @@ -198,6 +195,7 @@ ; restore outside. ; CHECK-LABEL: loopInfoSaveOutsideLoop: ; +; ENABLE: movl %esi, %eax ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; @@ -205,11 +203,12 @@ ; Make sure we save the CSR used in the inline asm: rbx. ; CHECK: pushq %rbx ; +; DISABLE: movl %esi, %eax ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -221,22 +220,20 @@ ; CHECK: nop ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: @@ -274,6 +271,7 @@ ; save outside. ; CHECK-LABEL: loopInfoRestoreOutsideLoop: ; +; ENABLE: movl %esi, %eax ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; @@ -281,11 +279,12 @@ ; Make sure we save the CSR used in the inline asm: rbx. ; CHECK: pushq %rbx ; +; DISABLE: movl %esi, %eax ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -296,23 +295,21 @@ ; Next BB. ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { entry: @@ -358,6 +355,7 @@ ; Check that we handle inline asm correctly. ; CHECK-LABEL: inlineAsm: ; +; ENABLE: movl %esi, %eax ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; @@ -365,6 +363,7 @@ ; Make sure we save the CSR used in the inline asm: rbx. ; CHECK: pushq %rbx ; +; DISABLE: movl %esi, %eax ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; @@ -378,24 +377,22 @@ ; CHECK-NEXT: jne [[LOOP_LABEL]] ; Next BB. ; CHECK: nop -; CHECK: xorl %esi, %esi +; CHECK: xorl %eax, %eax ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: addl %eax, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: addl %eax, %eax ; ENABLE-NEXT: retq define i32 @inlineAsm(i32 %cond, i32 %N) { entry: Index: test/CodeGen/X86/xaluo.ll =================================================================== --- test/CodeGen/X86/xaluo.ll +++ test/CodeGen/X86/xaluo.ll @@ -719,26 +719,26 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: saddoselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %eax, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %eax, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %eax, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -749,26 +749,26 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: saddoselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rax, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rax, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rax, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -779,26 +779,26 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: uaddoselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %eax, %ecx +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %eax, %ecx +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %eax, %ecx +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -809,26 +809,26 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: uaddoselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rax, %rcx +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rax, %rcx +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rax, %rcx +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -839,23 +839,23 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: ssuboselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %eax, %edi +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %eax, %edi +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %eax, %edi +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -866,23 +866,23 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: ssuboselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rax, %rdi +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rax, %rdi +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rax, %rdi +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -893,23 +893,23 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: usuboselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %eax, %edi +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %eax, %edi +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %eax, %edi +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -920,23 +920,23 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: usuboselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rax, %rdi +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rax, %rdi +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rax, %rdi +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1372,23 +1372,23 @@ define {i64, i1} @usuboovf(i64 %a, i64 %b) { ; SDAG-LABEL: usuboovf: ; SDAG: ## %bb.0: -; SDAG-NEXT: notq %rsi -; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: notq %rax +; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboovf: ; FAST: ## %bb.0: -; FAST-NEXT: notq %rsi -; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: notq %rax +; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: retq ; ; KNL-LABEL: usuboovf: ; KNL: ## %bb.0: -; KNL-NEXT: notq %rsi -; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: notq %rax +; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: retq %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %a) %v0 = extractvalue {i64, i1} %t0, 0 Index: test/CodeGen/X86/xchg-nofold.ll =================================================================== --- test/CodeGen/X86/xchg-nofold.ll +++ test/CodeGen/X86/xchg-nofold.ll @@ -9,20 +9,21 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind { ; CHECK-LABEL: _Z3fooRSt6atomicIbEb: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $3, %rax -; CHECK-NEXT: movb 2147450880(%rax), %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: movb 2147450880(%rcx), %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: andl $7, %ecx -; CHECK-NEXT: cmpb %al, %cl +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $7, %edx +; CHECK-NEXT: cmpb %cl, %dl ; CHECK-NEXT: jge .LBB0_2 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: xchgb %al, (%rdi) -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xchgb %cl, (%rdi) +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: pushq %rax Index: test/CodeGen/X86/xmulo.ll =================================================================== --- test/CodeGen/X86/xmulo.ll +++ test/CodeGen/X86/xmulo.ll @@ -92,6 +92,7 @@ ; SDAG-LABEL: smuloi8: ; SDAG: ## %bb.0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: def $al killed $al killed $eax ; SDAG-NEXT: imulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -101,6 +102,7 @@ ; FAST-LABEL: smuloi8: ; FAST: ## %bb.0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: def $al killed $al killed $eax ; FAST-NEXT: imulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -111,6 +113,7 @@ ; KNL-LABEL: smuloi8: ; KNL: ## %bb.0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: imulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -218,6 +221,7 @@ ; SDAG-LABEL: umuloi8: ; SDAG: ## %bb.0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: def $al killed $al killed $eax ; SDAG-NEXT: mulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -227,6 +231,7 @@ ; FAST-LABEL: umuloi8: ; FAST: ## %bb.0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: def $al killed $al killed $eax ; FAST-NEXT: mulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -237,6 +242,7 @@ ; KNL-LABEL: umuloi8: ; KNL: ## %bb.0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: mulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -254,6 +260,7 @@ ; SDAG: ## %bb.0: ; SDAG-NEXT: movq %rdx, %rcx ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax ; SDAG-NEXT: mulw %si ; SDAG-NEXT: seto %dl ; SDAG-NEXT: movw %ax, (%rcx) @@ -264,6 +271,7 @@ ; FAST: ## %bb.0: ; FAST-NEXT: movq %rdx, %rcx ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: def $ax killed $ax killed $eax ; FAST-NEXT: mulw %si ; FAST-NEXT: seto %dl ; FAST-NEXT: movw %ax, (%rcx) @@ -275,6 +283,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdx, %rcx ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: mulw %si ; KNL-NEXT: seto %dl ; KNL-NEXT: movw %ax, (%rcx) @@ -369,26 +378,26 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: smuloselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: imull %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: imull %eax, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: imull %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: imull %eax, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: imull %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: imull %eax, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -399,26 +408,26 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: smuloselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: imulq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: imulq %rax, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: imulq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: imulq %rax, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: imulq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: imulq %rax, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -694,8 +703,8 @@ define i1 @bug27873(i64 %c1, i1 %c2) { ; SDAG-LABEL: bug27873: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: mulq %rcx ; SDAG-NEXT: seto %al ; SDAG-NEXT: orb %sil, %al @@ -703,8 +712,8 @@ ; ; FAST-LABEL: bug27873: ; FAST: ## %bb.0: -; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: mulq %rcx ; FAST-NEXT: seto %al ; FAST-NEXT: orb %sil, %al @@ -712,8 +721,8 @@ ; ; KNL-LABEL: bug27873: ; KNL: ## %bb.0: -; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: mulq %rcx ; KNL-NEXT: seto %al ; KNL-NEXT: orb %sil, %al Index: test/CodeGen/X86/xor.ll =================================================================== --- test/CodeGen/X86/xor.ll +++ test/CodeGen/X86/xor.ll @@ -44,18 +44,18 @@ ; ; X64-LIN-LABEL: test3: ; X64-LIN: # %bb.0: # %entry -; X64-LIN-NEXT: notl %esi -; X64-LIN-NEXT: andl %edi, %esi -; X64-LIN-NEXT: shrl %esi ; X64-LIN-NEXT: movl %esi, %eax +; X64-LIN-NEXT: notl %eax +; X64-LIN-NEXT: andl %edi, %eax +; X64-LIN-NEXT: shrl %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test3: ; X64-WIN: # %bb.0: # %entry -; X64-WIN-NEXT: notl %edx -; X64-WIN-NEXT: andl %ecx, %edx -; X64-WIN-NEXT: shrl %edx ; X64-WIN-NEXT: movl %edx, %eax +; X64-WIN-NEXT: notl %eax +; X64-WIN-NEXT: andl %ecx, %eax +; X64-WIN-NEXT: shrl %eax ; X64-WIN-NEXT: retq entry: %tmp1not = xor i32 %b, -2 @@ -84,34 +84,34 @@ ; ; X64-LIN-LABEL: test4: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB3_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorl %esi, %edi -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: notl %eax -; X64-LIN-NEXT: andl %esi, %eax -; X64-LIN-NEXT: addl %eax, %eax -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notl %ecx +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB3_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test4: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB3_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorl %edx, %ecx -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: notl %eax -; X64-WIN-NEXT: andl %edx, %eax -; X64-WIN-NEXT: addl %eax, %eax -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notl %ecx +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB3_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -150,36 +150,38 @@ ; ; X64-LIN-LABEL: test5: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB4_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorl %esi, %edi -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: notl %eax -; X64-LIN-NEXT: andl %esi, %eax -; X64-LIN-NEXT: addl %eax, %eax -; X64-LIN-NEXT: testw %ax, %ax -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notl %ecx +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: testw %cx, %cx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB4_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: # kill: def $ax killed $ax killed $eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test5: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB4_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorl %edx, %ecx -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: notl %eax -; X64-WIN-NEXT: andl %edx, %eax -; X64-WIN-NEXT: addl %eax, %eax -; X64-WIN-NEXT: testw %ax, %ax -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notl %ecx +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: testw %cx, %cx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB4_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: # kill: def $ax killed $ax killed $eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -216,34 +218,36 @@ ; ; X64-LIN-LABEL: test6: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB5_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorb %sil, %dil -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: notb %al -; X64-LIN-NEXT: andb %sil, %al -; X64-LIN-NEXT: addb %al, %al -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorb %sil, %al +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notb %cl +; X64-LIN-NEXT: andb %sil, %cl +; X64-LIN-NEXT: addb %cl, %cl +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB5_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: # kill: def $al killed $al killed $eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test6: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB5_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorb %dl, %cl -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: notb %al -; X64-WIN-NEXT: andb %dl, %al -; X64-WIN-NEXT: addb %al, %al -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorb %dl, %al +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notb %cl +; X64-WIN-NEXT: andb %dl, %cl +; X64-WIN-NEXT: addb %cl, %cl +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB5_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: # kill: def $al killed $al killed $eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -280,34 +284,34 @@ ; ; X64-LIN-LABEL: test7: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB6_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorl %esi, %edi -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: xorl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-LIN-NEXT: andl %esi, %eax -; X64-LIN-NEXT: addl %eax, %eax -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB6_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test7: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB6_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorl %edx, %ecx -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: xorl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-WIN-NEXT: andl %edx, %eax -; X64-WIN-NEXT: addl %eax, %eax -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB6_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -334,14 +338,14 @@ ; ; X64-LIN-LABEL: test8: ; X64-LIN: # %bb.0: # %entry -; X64-LIN-NEXT: notl %edi ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: notl %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test8: ; X64-WIN: # %bb.0: # %entry -; X64-WIN-NEXT: notl %ecx ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: notl %eax ; X64-WIN-NEXT: retq entry: %t1 = sub i32 0, %a @@ -359,16 +363,16 @@ ; ; X64-LIN-LABEL: test9: ; X64-LIN: # %bb.0: -; X64-LIN-NEXT: notl %edi -; X64-LIN-NEXT: andl $4096, %edi # imm = 0x1000 ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: notl %eax +; X64-LIN-NEXT: andl $4096, %eax # imm = 0x1000 ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test9: ; X64-WIN: # %bb.0: -; X64-WIN-NEXT: notl %ecx -; X64-WIN-NEXT: andl $4096, %ecx # imm = 0x1000 ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: notl %eax +; X64-WIN-NEXT: andl $4096, %eax # imm = 0x1000 ; X64-WIN-NEXT: retq %1 = and i32 %a, 4096 %2 = xor i32 %1, 4096 @@ -456,8 +460,9 @@ ; ; X64-LIN-LABEL: test11: ; X64-LIN: # %bb.0: -; X64-LIN-NEXT: movl $-2, %eax ; X64-LIN-NEXT: movl %edi, %ecx +; X64-LIN-NEXT: movl $-2, %eax +; X64-LIN-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-LIN-NEXT: roll %cl, %eax ; X64-LIN-NEXT: retq ; Index: test/DebugInfo/COFF/pieces.ll =================================================================== --- test/DebugInfo/COFF/pieces.ll +++ test/DebugInfo/COFF/pieces.ll @@ -64,15 +64,15 @@ ; ASM-LABEL: pad_right: # @pad_right -; ASM: #DEBUG_VALUE: pad_right:o <- [DW_OP_LLVM_fragment 32 32] $ecx -; ASM: movl %ecx, %eax +; ASM: movq %rcx, %rax +; ASM: #DEBUG_VALUE: pad_right:o <- [DW_OP_LLVM_fragment 32 32] $eax ; ASM: retq ; ASM-LABEL: pad_left: # @pad_left -; ASM: #DEBUG_VALUE: pad_left:o <- [DW_OP_LLVM_fragment 0 32] $ecx ; ASM: .cv_loc 2 1 24 3 # t.c:24:3 -; ASM: movl %ecx, %eax +; ASM: movq %rcx, %rax +; ASM: #DEBUG_VALUE: pad_left:o <- [DW_OP_LLVM_fragment 0 32] $eax ; ASM: retq @@ -133,7 +133,7 @@ ; ASM: .asciz "pad_right" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range .Lfunc_begin1 .Lfunc_end1, "C\021\022\000\000\000\004\000\000\000" +; ASM: .cv_def_range .Lfunc_begin1 .Ltmp8, "C\021\021\000\000\000\004\000\000\000" ; OBJ-LABEL: {{.*}}Proc{{.*}}Sym { ; OBJ: Kind: S_GPROC32_ID (0x1147) @@ -143,7 +143,7 @@ ; OBJ: VarName: o ; OBJ: } ; OBJ: DefRangeSubfieldRegisterSym { -; OBJ: Register: ECX (0x12) +; OBJ: Register: EAX (0x11) ; OBJ: MayHaveNoName: 0 ; OBJ: OffsetInParent: 4 ; OBJ: LocalVariableAddrRange { @@ -156,7 +156,7 @@ ; ASM: .asciz "pad_left" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range .Lfunc_begin2 .Lfunc_end2, "C\021\022\000\000\000\000\000\000\000" +; ASM: .cv_def_range .Lfunc_begin2 .Ltmp10, "C\021\021\000\000\000\000\000\000\000" ; OBJ-LABEL: {{.*}}Proc{{.*}}Sym { ; OBJ: Kind: S_GPROC32_ID (0x1147) @@ -166,7 +166,7 @@ ; OBJ: VarName: o ; OBJ: } ; OBJ: DefRangeSubfieldRegisterSym { -; OBJ: Register: ECX (0x12) +; OBJ: Register: EAX (0x11) ; OBJ: MayHaveNoName: 0 ; OBJ: OffsetInParent: 0 ; OBJ: LocalVariableAddrRange { Index: test/DebugInfo/X86/live-debug-values.ll =================================================================== --- test/DebugInfo/X86/live-debug-values.ll +++ test/DebugInfo/X86/live-debug-values.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: #DEBUG_VALUE: main:n <- $ebx ; Other register values have been clobbered. ; CHECK-NOT: #DEBUG_VALUE: -; CHECK: movl %ecx, m(%rip) +; CHECK: movl %esi, m(%rip) ; ModuleID = 'LiveDebugValues.c' source_filename = "test/DebugInfo/X86/live-debug-values.ll" Index: test/DebugInfo/X86/live-debug-variables.ll =================================================================== --- test/DebugInfo/X86/live-debug-variables.ll +++ test/DebugInfo/X86/live-debug-variables.ll @@ -25,7 +25,7 @@ ; CHECK: .debug_loc contents: ; CHECK-NEXT: 0x00000000: ; We currently emit an entry for the function prologue, too, which could be optimized away. -; CHECK: [0x000000000000001f, 0x000000000000003c): DW_OP_reg3 RBX +; CHECK: [0x0000000000000018, 0x0000000000000072): DW_OP_reg3 RBX ; We should only have one entry inside the function. ; CHECK-NOT: : Index: test/DebugInfo/X86/pieces-3.ll =================================================================== --- test/DebugInfo/X86/pieces-3.ll +++ test/DebugInfo/X86/pieces-3.ll @@ -17,11 +17,12 @@ ; ; CHECK: DW_TAG_formal_parameter [3] ; CHECK-NEXT: DW_AT_location [DW_FORM_data4] ( -; CHECK-NEXT: [0x0000000000000000, 0x0000000000000004): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4 -; CHECK-NEXT: [0x0000000000000004, 0x0000000000000008): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4) +; CHECK-NEXT: [0x0000000000000000, 0x0000000000000007): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4 +; CHECK-NEXT: [0x0000000000000007, 0x0000000000000007): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg0 RAX, DW_OP_piece 0x4) ; CHECK-NEXT: DW_AT_name {{.*}}"outer" ; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location {{.*}}(DW_OP_reg4 RSI, DW_OP_piece 0x4) +; CHECK-NEXT: DW_AT_location [DW_FORM_data4] (0x00000044 +; CHECK-NEXT: [0x0000000000000007, 0x0000000000000007): DW_OP_reg0 RAX, DW_OP_piece 0x4) ; CHECK-NEXT: "i1" ; ModuleID = '/Volumes/Data/llvm/test/DebugInfo/X86/sroasplit-2.ll'