Index: lib/Target/X86/X86RegisterInfo.h =================================================================== --- lib/Target/X86/X86RegisterInfo.h +++ lib/Target/X86/X86RegisterInfo.h @@ -95,6 +95,8 @@ unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + bool enableMultipleCopyHints() const override { return true; } + /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. const MCPhysReg * Index: test/CodeGen/X86/GlobalISel/add-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-scalar.ll +++ test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -57,8 +57,9 @@ define i8 @test_add_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_add_i8: ; X64: # %bb.0: -; X64-NEXT: addb %dil, %sil ; X64-NEXT: movl %esi, %eax +; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: test_add_i8: Index: test/CodeGen/X86/GlobalISel/and-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/and-scalar.ll +++ test/CodeGen/X86/GlobalISel/and-scalar.ll @@ -19,8 +19,9 @@ define i8 @test_and_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_and_i8: ; ALL: # %bb.0: -; ALL-NEXT: andb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andb %dil, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %ret = and i8 %arg1, %arg2 ret i8 %ret @@ -29,8 +30,9 @@ define i16 @test_and_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_and_i16: ; ALL: # %bb.0: -; ALL-NEXT: andw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq %ret = and i16 %arg1, %arg2 ret i16 %ret @@ -39,8 +41,8 @@ define i32 @test_and_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_and_i32: ; ALL: # %bb.0: -; ALL-NEXT: andl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: andl %edi, %eax ; ALL-NEXT: retq %ret = and i32 %arg1, %arg2 ret i32 %ret @@ -49,8 +51,8 @@ define i64 @test_and_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_and_i64: ; ALL: # %bb.0: -; ALL-NEXT: andq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: andq %rdi, %rax ; ALL-NEXT: retq %ret = and i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/ashr-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ashr-scalar.ll +++ test/CodeGen/X86/GlobalISel/ashr-scalar.ll @@ -4,10 +4,10 @@ define i64 @test_ashr_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_ashr_i64: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: sarq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %res = ashr i64 %arg1, %arg2 ret i64 %res @@ -16,10 +16,10 @@ define i64 @test_ashr_i64_imm(i64 %arg1) { ; X64-LABEL: test_ashr_i64_imm: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq $5, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: sarq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %res = ashr i64 %arg1, 5 ret i64 %res @@ -28,10 +28,10 @@ define i64 @test_ashr_i64_imm1(i64 %arg1) { ; X64-LABEL: test_ashr_i64_imm1: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq $1, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: sarq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %res = ashr i64 %arg1, 1 ret i64 %res @@ -40,10 +40,10 @@ define i32 @test_ashr_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_ashr_i32: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: sarl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl %cl, %eax ; X64-NEXT: retq %res = ashr i32 %arg1, %arg2 ret i32 %res @@ -52,10 +52,10 @@ define i32 @test_ashr_i32_imm(i32 %arg1) { ; X64-LABEL: test_ashr_i32_imm: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $5, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: sarl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl %cl, %eax ; X64-NEXT: retq %res = ashr i32 %arg1, 5 ret i32 %res @@ -64,10 +64,10 @@ define i32 @test_ashr_i32_imm1(i32 %arg1) { ; X64-LABEL: test_ashr_i32_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $1, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: sarl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl %cl, %eax ; X64-NEXT: retq %res = ashr i32 %arg1, 1 ret i32 %res @@ -76,10 +76,12 @@ define i16 @test_ashr_i16(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_ashr_i16: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cx killed $cx killed $ecx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: sarw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %a2 = trunc i32 %arg2 to i16 @@ -90,10 +92,11 @@ define i16 @test_ashr_i16_imm(i32 %arg1) { ; X64-LABEL: test_ashr_i16_imm: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movw $5, %cx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: sarw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %res = ashr i16 %a, 5 @@ -103,10 +106,11 @@ define i16 @test_ashr_i16_imm1(i32 %arg1) { ; X64-LABEL: test_ashr_i16_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movw $1, %cx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: sarw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %res = ashr i16 %a, 1 @@ -116,9 +120,11 @@ define i8 @test_ashr_i8(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_ashr_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %a2 = trunc i32 %arg2 to i8 @@ -129,8 +135,9 @@ define i8 @test_ashr_i8_imm(i32 %arg1) { ; X64-LABEL: test_ashr_i8_imm: ; X64: # %bb.0: -; X64-NEXT: sarb $5, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb $5, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %res = ashr i8 %a, 5 @@ -140,8 +147,9 @@ define i8 @test_ashr_i8_imm1(i32 %arg1) { ; X64-LABEL: test_ashr_i8_imm1: ; X64: # %bb.0: -; X64-NEXT: sarb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %res = ashr i8 %a, 1 @@ -151,12 +159,14 @@ define i1 @test_ashr_i1(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_ashr_i1: ; X64: # %bb.0: -; X64-NEXT: shlb $7, %dil -; X64-NEXT: sarb $7, %dil -; X64-NEXT: andb $1, %sil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: shlb $7, %al +; X64-NEXT: sarb $7, %al +; X64-NEXT: andb $1, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 %a2 = trunc i32 %arg2 to i1 @@ -167,12 +177,13 @@ define i1 @test_ashr_i1_imm1(i32 %arg1) { ; X64-LABEL: test_ashr_i1_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movb $-1, %cl -; X64-NEXT: shlb $7, %dil -; X64-NEXT: sarb $7, %dil +; X64-NEXT: shlb $7, %al +; X64-NEXT: sarb $7, %al ; X64-NEXT: andb $1, %cl -; X64-NEXT: sarb %cl, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 %res = ashr i1 %a, 1 Index: test/CodeGen/X86/GlobalISel/binop.ll =================================================================== --- test/CodeGen/X86/GlobalISel/binop.ll +++ test/CodeGen/X86/GlobalISel/binop.ll @@ -7,8 +7,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_sub_i64: ; ALL: # %bb.0: -; ALL-NEXT: subq %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: subq %rsi, %rax ; ALL-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -17,8 +17,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_sub_i32: ; ALL: # %bb.0: -; ALL-NEXT: subl %esi, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: subl %esi, %eax ; ALL-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret Index: test/CodeGen/X86/GlobalISel/callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/callingconv.ll +++ test/CodeGen/X86/GlobalISel/callingconv.ll @@ -38,6 +38,7 @@ ; X64-LABEL: test_arg_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ret i8 %a } @@ -51,6 +52,7 @@ ; X64-LABEL: test_arg_i16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ret i16 %a } @@ -114,8 +116,8 @@ ; X32: # %bb.0: ; X32-NEXT: subl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 16 -; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: movaps %xmm2, %xmm0 +; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: addl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl @@ -257,8 +259,8 @@ ; X32-NEXT: .cfi_def_cfa_offset 48 ; X32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: movdqu {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-NEXT: movdqu {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: calll split_return_callee ; X32-NEXT: paddd (%esp), %xmm0 # 16-byte Folded Reload ; X32-NEXT: paddd {{[0-9]+}}(%esp), %xmm1 # 16-byte Folded Reload Index: test/CodeGen/X86/GlobalISel/ext-x86-64.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext-x86-64.ll +++ test/CodeGen/X86/GlobalISel/ext-x86-64.ll @@ -6,9 +6,8 @@ define i64 @test_zext_i1(i8 %a) { ; X64-LABEL: test_zext_i1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andq $1, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andq $1, %rax ; X64-NEXT: retq %val = trunc i8 %a to i1 %r = zext i1 %val to i64 @@ -18,14 +17,13 @@ define i64 @test_sext_i8(i8 %val) { ; X64-LABEL: test_sext_i8: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movq $56, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shlq %cl, %rdi +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: movq $56, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: sarq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %r = sext i8 %val to i64 ret i64 %r @@ -34,14 +32,13 @@ define i64 @test_sext_i16(i16 %val) { ; X64-LABEL: test_sext_i16: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movq $48, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shlq %cl, %rdi +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: movq $48, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: sarq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %r = sext i16 %val to i64 ret i64 %r Index: test/CodeGen/X86/GlobalISel/ext.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ext.ll +++ test/CodeGen/X86/GlobalISel/ext.ll @@ -5,8 +5,9 @@ define i8 @test_zext_i1toi8(i32 %a) { ; X64-LABEL: test_zext_i1toi8: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi8: @@ -23,8 +24,9 @@ define i16 @test_zext_i1toi16(i32 %a) { ; X64-LABEL: test_zext_i1toi16: ; X64: # %bb.0: -; X64-NEXT: andw $1, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andw $1, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1toi16: @@ -41,8 +43,8 @@ define i32 @test_zext_i1(i32 %a) { ; X64-LABEL: test_zext_i1: ; X64: # %bb.0: -; X64-NEXT: andl $1, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax ; X64-NEXT: retq ; ; X32-LABEL: test_zext_i1: @@ -86,13 +88,13 @@ define i32 @test_sext_i8(i8 %val) { ; X64-LABEL: test_sext_i8: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $24, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shll %cl, %edi +; X64-NEXT: shll %cl, %eax ; X64-NEXT: movl $24, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: sarl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl %cl, %eax ; X64-NEXT: retq ; ; X32-LABEL: test_sext_i8: @@ -106,13 +108,13 @@ define i32 @test_sext_i16(i16 %val) { ; X64-LABEL: test_sext_i16: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $16, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shll %cl, %edi +; X64-NEXT: shll %cl, %eax ; X64-NEXT: movl $16, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: sarl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl %cl, %eax ; X64-NEXT: retq ; ; X32-LABEL: test_sext_i16: Index: test/CodeGen/X86/GlobalISel/lshr-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/lshr-scalar.ll +++ test/CodeGen/X86/GlobalISel/lshr-scalar.ll @@ -4,10 +4,10 @@ define i64 @test_lshr_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_lshr_i64: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shrq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %res = lshr i64 %arg1, %arg2 ret i64 %res @@ -16,10 +16,10 @@ define i64 @test_lshr_i64_imm(i64 %arg1) { ; X64-LABEL: test_lshr_i64_imm: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq $5, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shrq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %res = lshr i64 %arg1, 5 ret i64 %res @@ -28,10 +28,10 @@ define i64 @test_lshr_i64_imm1(i64 %arg1) { ; X64-LABEL: test_lshr_i64_imm1: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq $1, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shrq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %res = lshr i64 %arg1, 1 ret i64 %res @@ -40,10 +40,10 @@ define i32 @test_lshr_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_lshr_i32: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl %cl, %eax ; X64-NEXT: retq %res = lshr i32 %arg1, %arg2 ret i32 %res @@ -52,10 +52,10 @@ define i32 @test_lshr_i32_imm(i32 %arg1) { ; X64-LABEL: test_lshr_i32_imm: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $5, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl %cl, %eax ; X64-NEXT: retq %res = lshr i32 %arg1, 5 ret i32 %res @@ -64,10 +64,10 @@ define i32 @test_lshr_i32_imm1(i32 %arg1) { ; X64-LABEL: test_lshr_i32_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $1, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl %cl, %eax ; X64-NEXT: retq %res = lshr i32 %arg1, 1 ret i32 %res @@ -76,10 +76,12 @@ define i16 @test_lshr_i16(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_lshr_i16: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cx killed $cx killed $ecx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: shrw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %a2 = trunc i32 %arg2 to i16 @@ -90,10 +92,11 @@ define i16 @test_lshr_i16_imm(i32 %arg1) { ; X64-LABEL: test_lshr_i16_imm: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movw $5, %cx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: shrw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %res = lshr i16 %a, 5 @@ -103,10 +106,11 @@ define i16 @test_lshr_i16_imm1(i32 %arg1) { ; X64-LABEL: test_lshr_i16_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movw $1, %cx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: shrw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %res = lshr i16 %a, 1 @@ -116,9 +120,11 @@ define i8 @test_lshr_i8(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_lshr_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %a2 = trunc i32 %arg2 to i8 @@ -129,8 +135,9 @@ define i8 @test_lshr_i8_imm(i32 %arg1) { ; X64-LABEL: test_lshr_i8_imm: ; X64: # %bb.0: -; X64-NEXT: shrb $5, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb $5, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %res = lshr i8 %a, 5 @@ -140,8 +147,9 @@ define i8 @test_lshr_i8_imm1(i32 %arg1) { ; X64-LABEL: test_lshr_i8_imm1: ; X64: # %bb.0: -; X64-NEXT: shrb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %res = lshr i8 %a, 1 @@ -151,11 +159,13 @@ define i1 @test_lshr_i1(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_lshr_i1: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: andb $1, %sil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: andb $1, %al +; X64-NEXT: andb $1, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 %a2 = trunc i32 %arg2 to i1 @@ -166,11 +176,12 @@ define i1 @test_lshr_i1_imm1(i32 %arg1) { ; X64-LABEL: test_lshr_i1_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movb $-1, %cl -; X64-NEXT: andb $1, %dil +; X64-NEXT: andb $1, %al ; X64-NEXT: andb $1, %cl -; X64-NEXT: shrb %cl, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 %res = lshr i1 %a, 1 Index: test/CodeGen/X86/GlobalISel/memop-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/memop-scalar.ll +++ test/CodeGen/X86/GlobalISel/memop-scalar.ll @@ -82,9 +82,9 @@ define i1 * @test_store_i1(i1 %val, i1 * %p1) { ; ALL-LABEL: test_store_i1: ; ALL: # %bb.0: +; ALL-NEXT: movq %rsi, %rax ; ALL-NEXT: andb $1, %dil ; ALL-NEXT: movb %dil, (%rsi) -; ALL-NEXT: movq %rsi, %rax ; ALL-NEXT: retq store i1 %val, i1* %p1 ret i1 * %p1; @@ -93,8 +93,8 @@ define i32 * @test_store_i32(i32 %val, i32 * %p1) { ; ALL-LABEL: test_store_i32: ; ALL: # %bb.0: -; ALL-NEXT: movl %edi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movl %edi, (%rsi) ; ALL-NEXT: retq store i32 %val, i32* %p1 ret i32 * %p1; @@ -103,8 +103,8 @@ define i64 * @test_store_i64(i64 %val, i64 * %p1) { ; ALL-LABEL: test_store_i64: ; ALL: # %bb.0: -; ALL-NEXT: movq %rdi, (%rsi) ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: movq %rdi, (%rsi) ; ALL-NEXT: retq store i64 %val, i64* %p1 ret i64 * %p1; @@ -114,15 +114,15 @@ ; ; SSE_FAST-LABEL: test_store_float: ; SSE_FAST: # %bb.0: -; SSE_FAST-NEXT: movd %xmm0, %eax -; SSE_FAST-NEXT: movl %eax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movd %xmm0, %ecx +; SSE_FAST-NEXT: movl %ecx, (%rdi) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_float: ; SSE_GREEDY: # %bb.0: -; SSE_GREEDY-NEXT: movss %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movss %xmm0, (%rdi) ; SSE_GREEDY-NEXT: retq store float %val, float* %p1 ret float * %p1; @@ -132,15 +132,15 @@ ; ; SSE_FAST-LABEL: test_store_double: ; SSE_FAST: # %bb.0: -; SSE_FAST-NEXT: movq %xmm0, %rax -; SSE_FAST-NEXT: movq %rax, (%rdi) ; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: movq %xmm0, %rcx +; SSE_FAST-NEXT: movq %rcx, (%rdi) ; SSE_FAST-NEXT: retq ; ; SSE_GREEDY-LABEL: test_store_double: ; SSE_GREEDY: # %bb.0: -; SSE_GREEDY-NEXT: movsd %xmm0, (%rdi) ; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: movsd %xmm0, (%rdi) ; SSE_GREEDY-NEXT: retq store double %val, double* %p1 ret double * %p1; Index: test/CodeGen/X86/GlobalISel/mul-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/mul-scalar.ll +++ test/CodeGen/X86/GlobalISel/mul-scalar.ll @@ -8,31 +8,32 @@ ;} define i16 @test_mul_i16(i16 %arg1, i16 %arg2) { -; X64-LABEL: test_mul_i16: -; X64: # %bb.0: -; X64-NEXT: imulw %di, %si -; X64-NEXT: movl %esi, %eax -; X64-NEXT: retq +; ALL-LABEL: test_mul_i16: +; ALL: # %bb.0: +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: imulw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax +; ALL-NEXT: retq %ret = mul i16 %arg1, %arg2 ret i16 %ret } define i32 @test_mul_i32(i32 %arg1, i32 %arg2) { -; X64-LABEL: test_mul_i32: -; X64: # %bb.0: -; X64-NEXT: imull %edi, %esi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: retq +; ALL-LABEL: test_mul_i32: +; ALL: # %bb.0: +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: imull %edi, %eax +; ALL-NEXT: retq %ret = mul i32 %arg1, %arg2 ret i32 %ret } define i64 @test_mul_i64(i64 %arg1, i64 %arg2) { -; X64-LABEL: test_mul_i64: -; X64: # %bb.0: -; X64-NEXT: imulq %rdi, %rsi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: retq +; ALL-LABEL: test_mul_i64: +; ALL: # %bb.0: +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: imulq %rdi, %rax +; ALL-NEXT: retq %ret = mul i64 %arg1, %arg2 ret i64 %ret } Index: test/CodeGen/X86/GlobalISel/or-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/or-scalar.ll +++ test/CodeGen/X86/GlobalISel/or-scalar.ll @@ -19,8 +19,9 @@ define i8 @test_or_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_or_i8: ; ALL: # %bb.0: -; ALL-NEXT: orb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orb %dil, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %ret = or i8 %arg1, %arg2 ret i8 %ret @@ -29,8 +30,9 @@ define i16 @test_or_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_or_i16: ; ALL: # %bb.0: -; ALL-NEXT: orw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq %ret = or i16 %arg1, %arg2 ret i16 %ret @@ -39,8 +41,8 @@ define i32 @test_or_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_or_i32: ; ALL: # %bb.0: -; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: orl %edi, %eax ; ALL-NEXT: retq %ret = or i32 %arg1, %arg2 ret i32 %ret @@ -49,8 +51,8 @@ define i64 @test_or_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_or_i64: ; ALL: # %bb.0: -; ALL-NEXT: orq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: orq %rdi, %rax ; ALL-NEXT: retq %ret = or i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/GlobalISel/phi.ll =================================================================== --- test/CodeGen/X86/GlobalISel/phi.ll +++ test/CodeGen/X86/GlobalISel/phi.ll @@ -4,15 +4,18 @@ define i8 @test_i8(i32 %a, i8 %f, i8 %t) { ; ALL-LABEL: test_i8: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al -; ALL-NEXT: jne .LBB0_2 -; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movl %edx, %esi -; ALL-NEXT: .LBB0_2: # %cond.end +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl +; ALL-NEXT: je .LBB0_2 +; ALL-NEXT: # %bb.1: ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: def $al killed $al killed $eax +; ALL-NEXT: retq +; ALL-NEXT: .LBB0_2: # %cond.false +; ALL-NEXT: movl %edx, %eax +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -32,15 +35,18 @@ define i16 @test_i16(i32 %a, i16 %f, i16 %t) { ; ALL-LABEL: test_i16: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al -; ALL-NEXT: jne .LBB1_2 -; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movl %edx, %esi -; ALL-NEXT: .LBB1_2: # %cond.end +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl +; ALL-NEXT: je .LBB1_2 +; ALL-NEXT: # %bb.1: ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax +; ALL-NEXT: retq +; ALL-NEXT: .LBB1_2: # %cond.false +; ALL-NEXT: movl %edx, %eax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -60,15 +66,15 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) { ; ALL-LABEL: test_i32: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB2_2 ; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movl %edx, %esi +; ALL-NEXT: movl %edx, %eax ; ALL-NEXT: .LBB2_2: # %cond.end -; ALL-NEXT: movl %esi, %eax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 @@ -88,15 +94,15 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) { ; ALL-LABEL: test_i64: ; ALL: # %bb.0: # %entry -; ALL-NEXT: xorl %eax, %eax -; ALL-NEXT: cmpl %eax, %edi -; ALL-NEXT: setg %al -; ALL-NEXT: testb $1, %al +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorl %ecx, %ecx +; ALL-NEXT: cmpl %ecx, %edi +; ALL-NEXT: setg %cl +; ALL-NEXT: testb $1, %cl ; ALL-NEXT: jne .LBB3_2 ; ALL-NEXT: # %bb.1: # %cond.false -; ALL-NEXT: movq %rdx, %rsi +; ALL-NEXT: movq %rdx, %rax ; ALL-NEXT: .LBB3_2: # %cond.end -; ALL-NEXT: movq %rsi, %rax ; ALL-NEXT: retq entry: %cmp = icmp sgt i32 %a, 0 Index: test/CodeGen/X86/GlobalISel/ptrtoint.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ptrtoint.ll +++ test/CodeGen/X86/GlobalISel/ptrtoint.ll @@ -4,7 +4,8 @@ define i1 @ptrtoint_s1_p0(i64* %p) { ; CHECK-LABEL: ptrtoint_s1_p0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $al killed $al killed $rax ; CHECK-NEXT: retq entry: %0 = ptrtoint i64* %p to i1 @@ -14,7 +15,8 @@ define i8 @ptrtoint_s8_p0(i64* %p) { ; CHECK-LABEL: ptrtoint_s8_p0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $al killed $al killed $rax ; CHECK-NEXT: retq entry: %0 = ptrtoint i64* %p to i8 @@ -24,7 +26,8 @@ define i16 @ptrtoint_s16_p0(i64* %p) { ; CHECK-LABEL: ptrtoint_s16_p0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax ; CHECK-NEXT: retq entry: %0 = ptrtoint i64* %p to i16 @@ -34,7 +37,8 @@ define i32 @ptrtoint_s32_p0(i64* %p) { ; CHECK-LABEL: ptrtoint_s32_p0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq entry: %0 = ptrtoint i64* %p to i32 Index: test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64 define i16 @test_shl_i4(i16 %v, i16 %a, i16 %b) { @@ -7,6 +8,17 @@ ; %v: 77 (0000 0000 0100 1101) ; %a: 74 (0000 0000 0100 1010) ; %b: 72 (0000 0000 0100 1000) +; X64-LABEL: test_shl_i4: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: addb %sil, %cl +; X64-NEXT: andb $15, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlb %cl, %al +; X64-NEXT: andw $15, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq %v.t = trunc i16 %v to i4 ; %v.t: 13 (1101) %a.t = trunc i16 %a to i4 ; %a.t: 10 (1010) %b.t = trunc i16 %b to i4 ; %b.t: 8 (1000) @@ -16,52 +28,27 @@ ; %r: 4 (0000 0000 0000 0100) ret i16 %r -; X64-LABEL: test_shl_i4 -; ; %di: 77 (0000 0000 0100 1101) ; %si: 74 (0000 0000 0100 1010) ; %dx: 72 (0000 0000 0100 1000) -; -; X64: # %bb.0: -; -; X64-NEXT: addb %sil, %dl ; %dx: 146 (0000 0000 1001 0010) -; -; X64-NEXT: andb $15, %dl ; %dx: 2 (0000 0000 0000 0010) -; -; X64-NEXT: movl %edx, %ecx ; %cx: 2 (0000 0000 0000 0010) -; -; X64-NEXT: shlb %cl, %dil ; %di: 52 (0000 0000 0011 0100) -; -; X64-NEXT: andw $15, %di ; %di: 4 (0000 0000 0000 0100) -; -; X64-NEXT: movl %edi, %eax ; %ax: 4 (0000 0000 0000 0100) -; -; X64-NEXT: retq -; ; Let's pretend that legalizing G_SHL by widening its second ; source operand is done via G_ANYEXT rather than G_ZEXT and ; see what happens: -; ; addb %sil, %dl ; %dx: 146 (0000 0000 1001 0010) -; ; movl %edx, %ecx ; %cx: 146 (0000 0000 1001 0010) -; ; shlb %cl, %dil ; %di: 0 (0000 0000 0000 0000) -; ; andw $15, %di ; %di: 0 (0000 0000 0000 0000) -; ; movl %edi, %eax ; %ax: 0 (0000 0000 0000 0000) -; ; retq } Index: test/CodeGen/X86/GlobalISel/shl-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar.ll @@ -4,10 +4,10 @@ define i64 @test_shl_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_shl_i64: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shlq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %res = shl i64 %arg1, %arg2 ret i64 %res @@ -16,10 +16,10 @@ define i64 @test_shl_i64_imm(i64 %arg1) { ; X64-LABEL: test_shl_i64_imm: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq $5, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shlq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %res = shl i64 %arg1, 5 ret i64 %res @@ -28,10 +28,10 @@ define i64 @test_shl_i64_imm1(i64 %arg1) { ; X64-LABEL: test_shl_i64_imm1: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq $1, %rcx ; X64-NEXT: # kill: def $cl killed $rcx -; X64-NEXT: shlq %cl, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %res = shl i64 %arg1, 1 ret i64 %res @@ -40,10 +40,10 @@ define i32 @test_shl_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_shl_i32: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %res = shl i32 %arg1, %arg2 ret i32 %res @@ -52,10 +52,10 @@ define i32 @test_shl_i32_imm(i32 %arg1) { ; X64-LABEL: test_shl_i32_imm: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $5, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %res = shl i32 %arg1, 5 ret i32 %res @@ -64,10 +64,10 @@ define i32 @test_shl_i32_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i32_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl $1, %ecx ; X64-NEXT: # kill: def $cl killed $ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %res = shl i32 %arg1, 1 ret i32 %res @@ -76,10 +76,12 @@ define i16 @test_shl_i16(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_shl_i16: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cx killed $cx killed $ecx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: shlw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %a2 = trunc i32 %arg2 to i16 @@ -90,10 +92,11 @@ define i16 @test_shl_i16_imm(i32 %arg1) { ; X64-LABEL: test_shl_i16_imm: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movw $5, %cx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: shlw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %res = shl i16 %a, 5 @@ -103,10 +106,11 @@ define i16 @test_shl_i16_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i16_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movw $1, %cx ; X64-NEXT: # kill: def $cl killed $cx -; X64-NEXT: shlw %cl, %di -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i16 %res = shl i16 %a, 1 @@ -116,9 +120,11 @@ define i8 @test_shl_i8(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_shl_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %a2 = trunc i32 %arg2 to i8 @@ -129,8 +135,9 @@ define i8 @test_shl_i8_imm(i32 %arg1) { ; X64-LABEL: test_shl_i8_imm: ; X64: # %bb.0: -; X64-NEXT: shlb $5, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb $5, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %res = shl i8 %a, 5 @@ -140,8 +147,9 @@ define i8 @test_shl_i8_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i8_imm1: ; X64: # %bb.0: -; X64-NEXT: addb %dil, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: addb %al, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 %res = shl i8 %a, 1 @@ -151,10 +159,12 @@ define i1 @test_shl_i1(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_shl_i1: ; X64: # %bb.0: -; X64-NEXT: andb $1, %sil -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: andb $1, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 %a2 = trunc i32 %arg2 to i1 @@ -165,10 +175,11 @@ define i1 @test_shl_i1_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i1_imm1: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movb $-1, %cl ; X64-NEXT: andb $1, %cl -; X64-NEXT: shlb %cl, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 %res = shl i1 %a, 1 Index: test/CodeGen/X86/GlobalISel/sub-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -4,8 +4,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X64-LABEL: test_sub_i64: ; X64: # %bb.0: -; X64-NEXT: subq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax ; X64-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret @@ -14,8 +14,8 @@ define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_sub_i32: ; X64: # %bb.0: -; X64-NEXT: subl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret @@ -24,8 +24,9 @@ define i16 @test_sub_i16(i16 %arg1, i16 %arg2) { ; X64-LABEL: test_sub_i16: ; X64: # %bb.0: -; X64-NEXT: subw %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subw %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %ret = sub i16 %arg1, %arg2 ret i16 %ret @@ -34,8 +35,9 @@ define i8 @test_sub_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_sub_i8: ; X64: # %bb.0: -; X64-NEXT: subb %sil, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: subb %sil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %ret = sub i8 %arg1, %arg2 ret i8 %ret Index: test/CodeGen/X86/GlobalISel/trunc.ll =================================================================== --- test/CodeGen/X86/GlobalISel/trunc.ll +++ test/CodeGen/X86/GlobalISel/trunc.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: trunc_i32toi1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r = trunc i32 %a to i1 ret i1 %r @@ -14,6 +15,7 @@ ; CHECK-LABEL: trunc_i32toi8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r = trunc i32 %a to i8 ret i8 %r @@ -23,6 +25,7 @@ ; CHECK-LABEL: trunc_i32toi16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %r = trunc i32 %a to i16 ret i16 %r @@ -31,7 +34,8 @@ define i8 @trunc_i64toi8(i64 %a) { ; CHECK-LABEL: trunc_i64toi8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $al killed $al killed $rax ; CHECK-NEXT: retq %r = trunc i64 %a to i8 ret i8 %r @@ -40,7 +44,8 @@ define i16 @trunc_i64toi16(i64 %a) { ; CHECK-LABEL: trunc_i64toi16: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax ; CHECK-NEXT: retq %r = trunc i64 %a to i16 ret i16 %r @@ -49,7 +54,8 @@ define i32 @trunc_i64toi32(i64 %a) { ; CHECK-LABEL: trunc_i64toi32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %r = trunc i64 %a to i32 ret i32 %r Index: test/CodeGen/X86/GlobalISel/undef.ll =================================================================== --- test/CodeGen/X86/GlobalISel/undef.ll +++ test/CodeGen/X86/GlobalISel/undef.ll @@ -11,8 +11,9 @@ define i8 @test2(i8 %a) { ; ALL-LABEL: test2: ; ALL: # %bb.0: -; ALL-NEXT: addb %al, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: addb %al, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %r = add i8 %a, undef ret i8 %r Index: test/CodeGen/X86/GlobalISel/xor-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/xor-scalar.ll +++ test/CodeGen/X86/GlobalISel/xor-scalar.ll @@ -19,8 +19,9 @@ define i8 @test_xor_i8(i8 %arg1, i8 %arg2) { ; ALL-LABEL: test_xor_i8: ; ALL: # %bb.0: -; ALL-NEXT: xorb %dil, %sil ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorb %dil, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %ret = xor i8 %arg1, %arg2 ret i8 %ret @@ -29,8 +30,9 @@ define i16 @test_xor_i16(i16 %arg1, i16 %arg2) { ; ALL-LABEL: test_xor_i16: ; ALL: # %bb.0: -; ALL-NEXT: xorw %di, %si ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorw %di, %ax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: retq %ret = xor i16 %arg1, %arg2 ret i16 %ret @@ -39,8 +41,8 @@ define i32 @test_xor_i32(i32 %arg1, i32 %arg2) { ; ALL-LABEL: test_xor_i32: ; ALL: # %bb.0: -; ALL-NEXT: xorl %edi, %esi ; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: xorl %edi, %eax ; ALL-NEXT: retq %ret = xor i32 %arg1, %arg2 ret i32 %ret @@ -49,8 +51,8 @@ define i64 @test_xor_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_xor_i64: ; ALL: # %bb.0: -; ALL-NEXT: xorq %rdi, %rsi ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: xorq %rdi, %rax ; ALL-NEXT: retq %ret = xor i64 %arg1, %arg2 ret i64 %ret Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -16,14 +16,14 @@ ; ; X64-LINUX-LABEL: test1: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: subl $-128, %edi ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: subl $-128, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test1: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: subl $-128, %ecx ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: subl $-128, %eax ; X64-WIN32-NEXT: retq entry: %b = add i32 %a, 128 @@ -38,14 +38,14 @@ ; ; X64-LINUX-LABEL: test2: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: subq $-2147483648, %rdi # imm = 0x80000000 ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test2: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: subq $-2147483648, %rcx # imm = 0x80000000 ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-2147483648, %rax # imm = 0x80000000 ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 2147483648 @@ -60,14 +60,14 @@ ; ; X64-LINUX-LABEL: test3: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: subq $-128, %rdi ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: subq $-128, %rax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test3: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: subq $-128, %rcx ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: subq $-128, %rax ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 128 @@ -204,16 +204,16 @@ ; ; X64-LINUX-LABEL: test7: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: addl %esi, %edi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: addl %esi, %eax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test7: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: addl %edx, %ecx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: addl %edx, %eax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -233,16 +233,16 @@ ; ; X64-LINUX-LABEL: test8: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: addq %rsi, %rdi -; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: movq %rdi, %rax +; X64-LINUX-NEXT: addq %rsi, %rax +; X64-LINUX-NEXT: setb %dl ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test8: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: addq %rdx, %rcx -; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: movq %rcx, %rax +; X64-WIN32-NEXT: addq %rdx, %rax +; X64-WIN32-NEXT: setb %dl ; X64-WIN32-NEXT: retq entry: %extleft = zext i64 %left to i65 @@ -268,20 +268,20 @@ ; ; X64-LINUX-LABEL: test9: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: xorl %eax, %eax -; X64-LINUX-NEXT: cmpl $10, %edi -; X64-LINUX-NEXT: sete %al -; X64-LINUX-NEXT: subl %eax, %esi ; X64-LINUX-NEXT: movl %esi, %eax +; X64-LINUX-NEXT: xorl %ecx, %ecx +; X64-LINUX-NEXT: cmpl $10, %edi +; X64-LINUX-NEXT: sete %cl +; X64-LINUX-NEXT: subl %ecx, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test9: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: xorl %eax, %eax -; X64-WIN32-NEXT: cmpl $10, %ecx -; X64-WIN32-NEXT: sete %al -; X64-WIN32-NEXT: subl %eax, %edx ; X64-WIN32-NEXT: movl %edx, %eax +; X64-WIN32-NEXT: xorl %edx, %edx +; X64-WIN32-NEXT: cmpl $10, %ecx +; X64-WIN32-NEXT: sete %dl +; X64-WIN32-NEXT: subl %edx, %eax ; X64-WIN32-NEXT: retq entry: %cmp = icmp eq i32 %x, 10 @@ -392,14 +392,14 @@ ; ; X64-LINUX-LABEL: inc_not: ; X64-LINUX: # %bb.0: -; X64-LINUX-NEXT: negl %edi ; X64-LINUX-NEXT: movl %edi, %eax +; X64-LINUX-NEXT: negl %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: inc_not: ; X64-WIN32: # %bb.0: -; X64-WIN32-NEXT: negl %ecx ; X64-WIN32-NEXT: movl %ecx, %eax +; X64-WIN32-NEXT: negl %eax ; X64-WIN32-NEXT: retq %nota = xor i32 %a, -1 %r = add i32 %nota, 1 Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -4,9 +4,9 @@ define i128 @add128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: add128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq %rdx, %rdi -; CHECK-NEXT: adcq %rcx, %rsi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: adcq %rcx, %rsi ; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: retq entry: @@ -17,6 +17,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind { ; CHECK-LABEL: add256: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq %r9, %rsi ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx @@ -25,7 +26,6 @@ ; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: movq %rcx, 16(%rdi) ; CHECK-NEXT: movq %r8, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %0 = add i256 %a, %b @@ -141,6 +141,7 @@ define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) { ; CHECK-LABEL: pr31719: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx ; CHECK-NEXT: adcq 8(%rsi), %rcx ; CHECK-NEXT: adcq 16(%rsi), %r8 @@ -149,7 +150,6 @@ ; CHECK-NEXT: movq %rcx, 8(%rdi) ; CHECK-NEXT: movq %r8, 16(%rdi) ; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %0 = extractvalue %scalar %arg.b, 0 @@ -236,9 +236,9 @@ define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: shiftadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: adcq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: adcq %rcx, %rax ; CHECK-NEXT: retq entry: %0 = zext i64 %a to i128 @@ -256,23 +256,23 @@ define %S @readd(%S* nocapture readonly %this, %S %arg.b) { ; CHECK-LABEL: readd: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: movq 8(%rsi), %r10 -; CHECK-NEXT: adcq $0, %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: addq %rcx, %r10 -; CHECK-NEXT: adcq 16(%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %r11 +; CHECK-NEXT: adcq $0, %r11 +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %edi +; CHECK-NEXT: addq %rcx, %r11 +; CHECK-NEXT: adcq 16(%rsi), %rdi ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %r8, %rdi ; CHECK-NEXT: adcq 24(%rsi), %rcx ; CHECK-NEXT: addq %r9, %rcx -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %r10, 8(%rdi) -; CHECK-NEXT: movq %rax, 16(%rdi) -; CHECK-NEXT: movq %rcx, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: movq %rdi, 16(%rax) +; CHECK-NEXT: movq %rcx, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 @@ -321,10 +321,10 @@ define i128 @addcarry1_not(i128 %n) { ; CHECK-LABEL: addcarry1_not: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: negq %rdi +; CHECK-NEXT: negq %rax ; CHECK-NEXT: sbbq %rsi, %rdx -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %1 = xor i128 %n, -1 %2 = add i128 %1, 1 Index: test/CodeGen/X86/and-encoding.ll =================================================================== --- test/CodeGen/X86/and-encoding.ll +++ test/CodeGen/X86/and-encoding.ll @@ -46,9 +46,9 @@ define i32 @lopped32_32to8(i32 %x) { ; CHECK-LABEL: lopped32_32to8: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $4, %edi # encoding: [0xc1,0xef,0x04] -; CHECK-NEXT: andl $-16, %edi # encoding: [0x83,0xe7,0xf0] ; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: shrl $4, %eax # encoding: [0xc1,0xe8,0x04] +; CHECK-NEXT: andl $-16, %eax # encoding: [0x83,0xe0,0xf0] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i32 %x, 4 %and = and i32 %shr, 268435440 @@ -60,9 +60,9 @@ define i64 @lopped64_32to8(i64 %x) { ; CHECK-LABEL: lopped64_32to8: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $36, %rdi # encoding: [0x48,0xc1,0xef,0x24] -; CHECK-NEXT: andl $-16, %edi # encoding: [0x83,0xe7,0xf0] ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: shrq $36, %rax # encoding: [0x48,0xc1,0xe8,0x24] +; CHECK-NEXT: andl $-16, %eax # encoding: [0x83,0xe0,0xf0] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 36 %and = and i64 %shr, 268435440 @@ -74,9 +74,9 @@ define i64 @lopped64_64to8(i64 %x) { ; CHECK-LABEL: lopped64_64to8: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04] -; CHECK-NEXT: andq $-16, %rdi # encoding: [0x48,0x83,0xe7,0xf0] ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: shrq $4, %rax # encoding: [0x48,0xc1,0xe8,0x04] +; CHECK-NEXT: andq $-16, %rax # encoding: [0x48,0x83,0xe0,0xf0] ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 4 %and = and i64 %shr, 1152921504606846960 @@ -88,10 +88,10 @@ define i64 @lopped64_64to32(i64 %x) { ; CHECK-LABEL: lopped64_64to32: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04] -; CHECK-NEXT: andq $-983056, %rdi # encoding: [0x48,0x81,0xe7,0xf0,0xff,0xf0,0xff] -; CHECK-NEXT: # imm = 0xFFF0FFF0 ; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: shrq $4, %rax # encoding: [0x48,0xc1,0xe8,0x04] +; CHECK-NEXT: andq $-983056, %rax # encoding: [0x48,0x25,0xf0,0xff,0xf0,0xff] +; CHECK-NEXT: # imm = 0xFFF0FFF0 ; CHECK-NEXT: retq # encoding: [0xc3] %shr = lshr i64 %x, 4 %and = and i64 %shr, 1152921504605863920 Index: test/CodeGen/X86/andimm8.ll =================================================================== --- test/CodeGen/X86/andimm8.ll +++ test/CodeGen/X86/andimm8.ll @@ -14,9 +14,8 @@ ; ; X64-LABEL: bra: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $-64, %edi # encoding: [0x83,0xe7,0xc0] -; X64-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: andl $-64, %eax # encoding: [0x83,0xe0,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %t1 = zext i32 %zed to i64 %t2 = and i64 %t1, 4294967232 @@ -57,8 +56,8 @@ ; ; X64-LABEL: bar: ; X64: # %bb.0: -; X64-NEXT: andl $42, %edi # encoding: [0x83,0xe7,0x2a] ; X64-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; X64-NEXT: andl $42, %eax # encoding: [0x83,0xe0,0x2a] ; X64-NEXT: retq # encoding: [0xc3] %t1 = and i64 %zed, 42 ret i64 %t1 @@ -75,9 +74,9 @@ ; ; X64-LABEL: baz: ; X64: # %bb.0: -; X64-NEXT: andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f] -; X64-NEXT: # imm = 0x7FFFFFFF ; X64-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; X64-NEXT: andl $2147483647, %eax # encoding: [0x25,0xff,0xff,0xff,0x7f] +; X64-NEXT: # imm = 0x7FFFFFFF ; X64-NEXT: retq # encoding: [0xc3] %t1 = and i64 %zed, 2147483647 ret i64 %t1 Index: test/CodeGen/X86/anyext.ll =================================================================== --- test/CodeGen/X86/anyext.ll +++ test/CodeGen/X86/anyext.ll @@ -41,8 +41,9 @@ ; ; X64-LABEL: bar: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: divw %si ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: andl $1, %eax Index: test/CodeGen/X86/apm.ll =================================================================== --- test/CodeGen/X86/apm.ll +++ test/CodeGen/X86/apm.ll @@ -17,8 +17,8 @@ ; ; X64-LABEL: foo: ; X64: # %bb.0: # %entry -; X64-NEXT: leaq (%rdi), %rax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: leaq (%rdi), %rax ; X64-NEXT: monitor ; X64-NEXT: retq ; @@ -46,8 +46,8 @@ ; ; X64-LABEL: bar: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %ecx ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx ; X64-NEXT: mwait ; X64-NEXT: retq ; Index: test/CodeGen/X86/atomic-eflags-reuse.ll =================================================================== --- test/CodeGen/X86/atomic-eflags-reuse.ll +++ test/CodeGen/X86/atomic-eflags-reuse.ll @@ -5,16 +5,16 @@ define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_add_1_cmov_slt: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock incq (%rdi) -; FASTINCDEC-NEXT: cmovgl %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock incq (%rdi) +; FASTINCDEC-NEXT: cmovgl %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_add_1_cmov_slt: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $1, (%rdi) -; SLOWINCDEC-NEXT: cmovgl %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $1, (%rdi) +; SLOWINCDEC-NEXT: cmovgl %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -26,16 +26,16 @@ define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_add_1_cmov_sge: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock incq (%rdi) -; FASTINCDEC-NEXT: cmovlel %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock incq (%rdi) +; FASTINCDEC-NEXT: cmovlel %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_add_1_cmov_sge: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $1, (%rdi) -; SLOWINCDEC-NEXT: cmovlel %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $1, (%rdi) +; SLOWINCDEC-NEXT: cmovlel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -47,16 +47,16 @@ define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_sub_1_cmov_sle: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock decq (%rdi) -; FASTINCDEC-NEXT: cmovgel %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: cmovgel %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_sub_1_cmov_sle: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) -; SLOWINCDEC-NEXT: cmovgel %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: cmovgel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -68,16 +68,16 @@ define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; FASTINCDEC-LABEL: test_sub_1_cmov_sgt: ; FASTINCDEC: # %bb.0: # %entry -; FASTINCDEC-NEXT: lock decq (%rdi) -; FASTINCDEC-NEXT: cmovll %edx, %esi ; FASTINCDEC-NEXT: movl %esi, %eax +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: cmovll %edx, %eax ; FASTINCDEC-NEXT: retq ; ; SLOWINCDEC-LABEL: test_sub_1_cmov_sgt: ; SLOWINCDEC: # %bb.0: # %entry -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) -; SLOWINCDEC-NEXT: cmovll %edx, %esi ; SLOWINCDEC-NEXT: movl %esi, %eax +; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: cmovll %edx, %eax ; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst @@ -159,11 +159,11 @@ define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sle: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovgl %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst @@ -175,11 +175,11 @@ define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_1_cmov_sgt: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: cmovlel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: lock xaddq %rcx, (%rdi) +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: cmovlel %edx, %eax ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -12,10 +12,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq Index: test/CodeGen/X86/avg.ll =================================================================== --- test/CodeGen/X86/avg.ll +++ test/CodeGen/X86/avg.ll @@ -1638,6 +1638,7 @@ define <512 x i8> @avg_v512i8_3(<512 x i8> %a, <512 x i8> %b) nounwind { ; SSE2-LABEL: avg_v512i8_3: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: movdqa %xmm8, 496(%rdi) @@ -1726,7 +1727,6 @@ ; SSE2-NEXT: movdqa %xmm1, 16(%rdi) ; SSE2-NEXT: pavgb {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rdi) -; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v512i8_3: @@ -1735,6 +1735,7 @@ ; AVX1-NEXT: movq %rsp, %rbp ; AVX1-NEXT: andq $-32, %rsp ; AVX1-NEXT: subq $128, %rsp +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vmovdqa 144(%rbp), %ymm8 ; AVX1-NEXT: vmovdqa 112(%rbp), %ymm9 ; AVX1-NEXT: vmovdqa 80(%rbp), %ymm10 @@ -1861,7 +1862,6 @@ ; AVX1-NEXT: vmovaps %ymm0, 32(%rdi) ; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; AVX1-NEXT: vmovaps %ymm0, (%rdi) -; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: movq %rbp, %rsp ; AVX1-NEXT: popq %rbp ; AVX1-NEXT: vzeroupper @@ -1873,6 +1873,7 @@ ; AVX2-NEXT: movq %rsp, %rbp ; AVX2-NEXT: andq $-32, %rsp ; AVX2-NEXT: subq $32, %rsp +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vmovdqa 240(%rbp), %ymm8 ; AVX2-NEXT: vmovdqa 208(%rbp), %ymm9 ; AVX2-NEXT: vmovdqa 176(%rbp), %ymm10 @@ -1913,7 +1914,6 @@ ; AVX2-NEXT: vmovdqa %ymm2, 64(%rdi) ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm0, (%rdi) -; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: movq %rbp, %rsp ; AVX2-NEXT: popq %rbp ; AVX2-NEXT: vzeroupper @@ -1925,6 +1925,7 @@ ; AVX512F-NEXT: movq %rsp, %rbp ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $32, %rsp +; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vmovdqa 240(%rbp), %ymm8 ; AVX512F-NEXT: vmovdqa 208(%rbp), %ymm9 ; AVX512F-NEXT: vmovdqa 176(%rbp), %ymm10 @@ -1965,7 +1966,6 @@ ; AVX512F-NEXT: vmovdqa %ymm2, 64(%rdi) ; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdi) ; AVX512F-NEXT: vmovdqa %ymm0, (%rdi) -; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: movq %rbp, %rsp ; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper @@ -1977,6 +1977,7 @@ ; AVX512BW-NEXT: movq %rsp, %rbp ; AVX512BW-NEXT: andq $-64, %rsp ; AVX512BW-NEXT: subq $64, %rsp +; AVX512BW-NEXT: movq %rdi, %rax ; AVX512BW-NEXT: vpavgb 16(%rbp), %zmm0, %zmm0 ; AVX512BW-NEXT: vpavgb 80(%rbp), %zmm1, %zmm1 ; AVX512BW-NEXT: vpavgb 144(%rbp), %zmm2, %zmm2 @@ -1993,7 +1994,6 @@ ; AVX512BW-NEXT: vmovdqa64 %zmm2, 128(%rdi) ; AVX512BW-NEXT: vmovdqa64 %zmm1, 64(%rdi) ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdi) -; AVX512BW-NEXT: movq %rdi, %rax ; AVX512BW-NEXT: movq %rbp, %rsp ; AVX512BW-NEXT: popq %rbp ; AVX512BW-NEXT: vzeroupper Index: test/CodeGen/X86/avoid-sfb.ll =================================================================== --- test/CodeGen/X86/avoid-sfb.ll +++ test/CodeGen/X86/avoid-sfb.ll @@ -727,28 +727,29 @@ define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 { ; CHECK-LABEL: test_stack: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: movups %xmm0, (%rdi) -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq %rax, 16(%rdi) -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: movl %eax, 24(%rdi) -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: movl %eax, 28(%rdi) -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movq %rcx, 16(%rdi) +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: movl %ecx, 24(%rdi) ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: movl %ecx, 28(%rdi) +; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movl %edx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; ; DISABLED-LABEL: test_stack: ; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movq %rdi, %rax ; DISABLED-NEXT: movl %esi, {{[0-9]+}}(%rsp) ; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; DISABLED-NEXT: movups %xmm0, (%rdi) @@ -758,51 +759,50 @@ ; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 ; DISABLED-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; DISABLED-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; DISABLED-NEXT: movq %rdi, %rax ; DISABLED-NEXT: retq ; ; CHECK-AVX2-LABEL: test_stack: ; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movq %rdi, %rax ; CHECK-AVX2-NEXT: movl %esi, {{[0-9]+}}(%rsp) ; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) -; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) -; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) -; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX2-NEXT: movl %eax, 28(%rdi) +; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-AVX2-NEXT: movq %rcx, 16(%rdi) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 24(%rdi) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 28(%rdi) ; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 ; CHECK-AVX2-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp) -; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-AVX2-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX2-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX2-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; CHECK-AVX2-NEXT: movq %rdi, %rax +; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-AVX2-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, {{[0-9]+}}(%rsp) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, {{[0-9]+}}(%rsp) ; CHECK-AVX2-NEXT: retq ; ; CHECK-AVX512-LABEL: test_stack: ; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movq %rdi, %rax ; CHECK-AVX512-NEXT: movl %esi, {{[0-9]+}}(%rsp) ; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) -; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) -; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) -; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX512-NEXT: movl %eax, 28(%rdi) +; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-AVX512-NEXT: movq %rcx, 16(%rdi) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 24(%rdi) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 28(%rdi) ; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 ; CHECK-AVX512-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp) -; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX512-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-AVX512-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; CHECK-AVX512-NEXT: movq %rdi, %rax +; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-AVX512-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, {{[0-9]+}}(%rsp) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, {{[0-9]+}}(%rsp) ; CHECK-AVX512-NEXT: retq entry: %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8* Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -122,8 +122,8 @@ ; pass parameters in registers for 64-bit platform ; X64-LABEL: test_int -; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi +; X64: leal {{.*}}, %edi ; X64: call ; X64: addl {{.*}}, %eax define i32 @test_int(i32 %a, i32 %b) nounwind { Index: test/CodeGen/X86/avx-vinsertf128.ll =================================================================== --- test/CodeGen/X86/avx-vinsertf128.ll +++ test/CodeGen/X86/avx-vinsertf128.ll @@ -75,8 +75,7 @@ define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: insert_undef_pd: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) ret <4 x double> %res @@ -86,8 +85,7 @@ define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: insert_undef_ps: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) ret <8 x float> %res @@ -97,8 +95,7 @@ define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: insert_undef_si: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) ret <8 x i32> %res Index: test/CodeGen/X86/avx512-arith.ll =================================================================== --- test/CodeGen/X86/avx512-arith.ll +++ test/CodeGen/X86/avx512-arith.ll @@ -904,9 +904,9 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_broadcast_vaddpd: ; CHECK: # %bb.0: -; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 +; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq double* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -272,9 +272,9 @@ define i32 @test10(i32 %a, i32 %b, i1 %cond) { ; ALL_X64-LABEL: test10: ; ALL_X64: ## %bb.0: -; ALL_X64-NEXT: testb $1, %dl -; ALL_X64-NEXT: cmovel %esi, %edi ; ALL_X64-NEXT: movl %edi, %eax +; ALL_X64-NEXT: testb $1, %dl +; ALL_X64-NEXT: cmovel %esi, %eax ; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test10: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -195,21 +195,21 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test12: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test12: ; SKX: ## %bb.0: -; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi ; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <16 x i64> %a, %b @@ -257,23 +257,23 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ; KNL-LABEL: test14: ; KNL: ## %bb.0: +; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; KNL-NEXT: kshiftrw $4, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $1, %al -; KNL-NEXT: cmoveq %rsi, %rdi -; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $1, %cl +; KNL-NEXT: cmoveq %rsi, %rax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test14: ; SKX: ## %bb.0: +; SKX-NEXT: movq %rdi, %rax ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ; SKX-NEXT: kshiftrw $4, %k0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: cmoveq %rsi, %rdi -; SKX-NEXT: movq %rdi, %rax +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: cmoveq %rsi, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %cmpvector_func.i = icmp slt <8 x i64> %a, %b Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -5853,9 +5853,10 @@ ; ; X64-LABEL: test_kand: ; X64: ## %bb.0: -; X64-NEXT: andl %esi, %edi ## encoding: [0x21,0xf7] -; X64-NEXT: andl $8, %edi ## encoding: [0x83,0xe7,0x08] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: andl %esi, %eax ## encoding: [0x21,0xf0] +; X64-NEXT: andl $8, %eax ## encoding: [0x83,0xe0,0x08] +; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ## encoding: [0xc3] %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) @@ -5875,9 +5876,10 @@ ; ; X64-LABEL: test_kandn: ; X64: ## %bb.0: -; X64-NEXT: orl $-9, %edi ## encoding: [0x83,0xcf,0xf7] -; X64-NEXT: andl %esi, %edi ## encoding: [0x21,0xf7] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: orl $-9, %eax ## encoding: [0x83,0xc8,0xf7] +; X64-NEXT: andl %esi, %eax ## encoding: [0x21,0xf0] +; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ## encoding: [0xc3] %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1) @@ -5895,8 +5897,9 @@ ; ; X64-LABEL: test_knot: ; X64: ## %bb.0: -; X64-NEXT: notl %edi ## encoding: [0xf7,0xd7] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: notl %eax ## encoding: [0xf7,0xd0] +; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) ret i16 %res @@ -5914,9 +5917,10 @@ ; ; X64-LABEL: test_kor: ; X64: ## %bb.0: -; X64-NEXT: orl %esi, %edi ## encoding: [0x09,0xf7] -; X64-NEXT: orl $8, %edi ## encoding: [0x83,0xcf,0x08] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: orl %esi, %eax ## encoding: [0x09,0xf0] +; X64-NEXT: orl $8, %eax ## encoding: [0x83,0xc8,0x08] +; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ## encoding: [0xc3] %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1) @@ -5937,9 +5941,10 @@ ; ; X64-LABEL: test_kxnor: ; X64: ## %bb.0: -; X64-NEXT: xorl %esi, %edi ## encoding: [0x31,0xf7] -; X64-NEXT: xorl $8, %edi ## encoding: [0x83,0xf7,0x08] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: xorl %esi, %eax ## encoding: [0x31,0xf0] +; X64-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08] +; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ## encoding: [0xc3] %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1) @@ -5958,9 +5963,10 @@ ; ; X64-LABEL: test_kxor: ; X64: ## %bb.0: -; X64-NEXT: xorl %esi, %edi ## encoding: [0x31,0xf7] -; X64-NEXT: xorl $8, %edi ## encoding: [0x83,0xf7,0x08] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: xorl %esi, %eax ## encoding: [0x31,0xf0] +; X64-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08] +; X64-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ## encoding: [0xc3] %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1) Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -9,8 +9,9 @@ define i16 @mask16(i16 %x) { ; CHECK-LABEL: mask16: ; CHECK: ## %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mask16: @@ -47,8 +48,9 @@ define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: ## %bb.0: -; CHECK-NEXT: notb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notb %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mask8: @@ -149,10 +151,11 @@ ; CHECK-LABEL: mand16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mand16: Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -871,7 +871,7 @@ ; X32-LABEL: testf32_inp: ; X32: # %bb.0: ; X32-NEXT: subl $44, %esp -; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm6, (%esp) # 16-byte Spill ; X32-NEXT: vaddps %zmm2, %zmm0, %zmm6 ; X32-NEXT: vaddps %zmm3, %zmm1, %zmm7 @@ -882,7 +882,7 @@ ; X32-NEXT: vaddps %zmm4, %zmm0, %zmm0 ; X32-NEXT: vaddps %zmm5, %zmm1, %zmm1 ; X32-NEXT: vmovups (%esp), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload ; X32-NEXT: addl $44, %esp ; X32-NEXT: retl ; @@ -923,48 +923,47 @@ ; X32-NEXT: pushl %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: subl $20, %esp -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: subl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: imull %ebp, %edx -; X32-NEXT: subl %esi, %ebx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl %ecx, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: imull %ebp, %ebx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: subl %edi, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %ebp ; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax ; X32-NEXT: imull %ebp, %eax -; X32-NEXT: addl %eax, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl {{[0-9]+}}(%esp), %edi -; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: imull %eax, %esi +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: imull %ebp, %edx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl %edx, %eax +; X32-NEXT: imull %edi, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: addl $20, %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -947,16 +947,16 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [10:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_mask_broadcast_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] +; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -6669,14 +6669,16 @@ define i16 @mask16(i16 %x) { ; GENERIC-LABEL: mask16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: notl %edi # sched: [1:0.33] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: notl %eax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mask16: ; SKX: # %bb.0: -; SKX-NEXT: notl %edi # sched: [1:0.25] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: notl %eax # sched: [1:0.25] +; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -6706,14 +6708,16 @@ define i8 @mask8(i8 %x) { ; GENERIC-LABEL: mask8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: notb %dil # sched: [1:0.33] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: notb %al # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mask8: ; SKX: # %bb.0: -; SKX-NEXT: notb %dil # sched: [1:0.25] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: notb %al # sched: [1:0.25] +; SKX-NEXT: # kill: def $al killed $al killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -6788,19 +6792,21 @@ ; GENERIC-LABEL: mand16: ; GENERIC: # %bb.0: ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33] -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: xorl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: andl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mand16: ; SKX: # %bb.0: ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] -; SKX-NEXT: andl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: orl %eax, %edi # sched: [1:0.25] -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] +; SKX-NEXT: xorl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: andl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -135,8 +135,9 @@ ; ; X64-LABEL: select05: ; X64: # %bb.0: -; X64-NEXT: orl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -185,8 +186,9 @@ ; ; X64-LABEL: select06: ; X64: # %bb.0: -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> Index: test/CodeGen/X86/avx512bw-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512bw-mask-op.ll +++ test/CodeGen/X86/avx512bw-mask-op.ll @@ -4,8 +4,8 @@ define i32 @mask32(i32 %x) { ; CHECK-LABEL: mask32: ; CHECK: ## %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax ; CHECK-NEXT: retq %m0 = bitcast i32 %x to <32 x i1> %m1 = xor <32 x i1> %m0, %m1 = xor <64 x i1> %m0, %mb = bitcast i32 %y to <32 x i1> @@ -116,10 +116,10 @@ ; CHECK-LABEL: mand64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: andq %rsi, %rax -; CHECK-NEXT: xorq %rsi, %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: andq %rsi, %rcx +; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq %ma = bitcast i64 %x to <64 x i1> %mb = bitcast i64 %y to <64 x i1> Index: test/CodeGen/X86/avx512dq-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512dq-mask-op.ll +++ test/CodeGen/X86/avx512dq-mask-op.ll @@ -4,8 +4,9 @@ define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: ## %bb.0: -; CHECK-NEXT: notb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notb %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -32,10 +33,11 @@ ; CHECK-LABEL: mand8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %ma = bitcast i8 %x to <8 x i1> %mb = bitcast i8 %y to <8 x i1> Index: test/CodeGen/X86/avx512vl-arith.ll =================================================================== --- test/CodeGen/X86/avx512vl-arith.ll +++ test/CodeGen/X86/avx512vl-arith.ll @@ -408,9 +408,9 @@ define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_256: ; CHECK: ## %bb.0: -; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] -; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x0f] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] +; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] +; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -835,9 +835,9 @@ define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind { ; CHECK-LABEL: test_mask_broadcast_vaddpd_128: ; CHECK: ## %bb.0: -; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] -; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x0f] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] +; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] +; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %tmp = load double, double* %j Index: test/CodeGen/X86/bigstructret.ll =================================================================== --- test/CodeGen/X86/bigstructret.ll +++ test/CodeGen/X86/bigstructret.ll @@ -8,20 +8,20 @@ define fastcc %0 @ReturnBigStruct() nounwind readnone { ; X86-LABEL: ReturnBigStruct: ; X86: # %bb.0: # %entry +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl $24601, 12(%ecx) # imm = 0x6019 ; X86-NEXT: movl $48, 8(%ecx) ; X86-NEXT: movl $24, 4(%ecx) ; X86-NEXT: movl $12, (%ecx) -; X86-NEXT: movl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $105660490448944, %rax # imm = 0x601900000030 -; X64-NEXT: movq %rax, 8(%rdi) -; X64-NEXT: movabsq $103079215116, %rax # imm = 0x180000000C -; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $105660490448944, %rcx # imm = 0x601900000030 +; X64-NEXT: movq %rcx, 8(%rdi) +; X64-NEXT: movabsq $103079215116, %rcx # imm = 0x180000000C +; X64-NEXT: movq %rcx, (%rdi) ; X64-NEXT: retq entry: %0 = insertvalue %0 zeroinitializer, i32 12, 0 @@ -35,18 +35,18 @@ define fastcc %1 @ReturnBigStruct2() nounwind readnone { ; X86-LABEL: ReturnBigStruct2: ; X86: # %bb.0: # %entry +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl $48, 4(%ecx) ; X86-NEXT: movb $1, 2(%ecx) ; X86-NEXT: movw $256, (%ecx) # imm = 0x100 -; X86-NEXT: movl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: ReturnBigStruct2: ; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movl $48, 4(%rdi) ; X64-NEXT: movb $1, 2(%rdi) ; X64-NEXT: movw $256, (%rdi) # imm = 0x100 -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq entry: %0 = insertvalue %1 zeroinitializer, i1 false, 0 Index: test/CodeGen/X86/bitcast-i256.ll =================================================================== --- test/CodeGen/X86/bitcast-i256.ll +++ test/CodeGen/X86/bitcast-i256.ll @@ -5,16 +5,16 @@ define i256 @foo(<8 x i32> %a) { ; FAST-LABEL: foo: ; FAST: # %bb.0: -; FAST-NEXT: vmovups %ymm0, (%rdi) ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: vmovups %ymm0, (%rdi) ; FAST-NEXT: vzeroupper ; FAST-NEXT: retq ; ; SLOW-LABEL: foo: ; SLOW: # %bb.0: +; SLOW-NEXT: movq %rdi, %rax ; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi) ; SLOW-NEXT: vmovups %xmm0, (%rdi) -; SLOW-NEXT: movq %rdi, %rax ; SLOW-NEXT: vzeroupper ; SLOW-NEXT: retq %r = bitcast <8 x i32> %a to i256 Index: test/CodeGen/X86/bitcast-int-to-vector-bool.ll =================================================================== --- test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -193,8 +193,8 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i32_32i1: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movl %esi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movl %esi, (%rdi) ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: bitcast_i32_32i1: @@ -250,14 +250,14 @@ define <64 x i1> @bitcast_i64_64i1(i64 %a0) { ; SSE2-SSSE3-LABEL: bitcast_i64_64i1: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movq %rsi, (%rdi) ; SSE2-SSSE3-NEXT: movq %rdi, %rax +; SSE2-SSSE3-NEXT: movq %rsi, (%rdi) ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_i64_64i1: ; AVX12: # %bb.0: -; AVX12-NEXT: movq %rsi, (%rdi) ; AVX12-NEXT: movq %rdi, %rax +; AVX12-NEXT: movq %rsi, (%rdi) ; AVX12-NEXT: retq ; ; AVX512-LABEL: bitcast_i64_64i1: Index: test/CodeGen/X86/bitreverse.ll =================================================================== --- test/CodeGen/X86/bitreverse.ll +++ test/CodeGen/X86/bitreverse.ll @@ -341,20 +341,21 @@ ; ; X64-LABEL: test_bitreverse_i8: ; X64: # %bb.0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $85, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-86, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $85, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-86, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -384,21 +385,22 @@ ; ; X64-LABEL: test_bitreverse_i4: ; X64: # %bb.0: -; X64-NEXT: rolb $4, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $51, %al -; X64-NEXT: shlb $2, %al -; X64-NEXT: andb $-52, %dil -; X64-NEXT: shrb $2, %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $80, %al -; X64-NEXT: addb %al, %al -; X64-NEXT: andb $-96, %dil -; X64-NEXT: shrb %dil -; X64-NEXT: orb %al, %dil -; X64-NEXT: shrb $4, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $4, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $51, %cl +; X64-NEXT: shlb $2, %cl +; X64-NEXT: andb $-52, %al +; X64-NEXT: shrb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andb $80, %cl +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $-96, %al +; X64-NEXT: shrb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: shrb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b @@ -474,6 +476,7 @@ ; X64-LABEL: identity_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) %c = call i8 @llvm.bitreverse.i8(i8 %b) Index: test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll @@ -10,9 +10,9 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test__andn_u64: ; X64: # %bb.0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 @@ -84,9 +84,9 @@ define i64 @test_andn_u64(i64 %a0, i64 %a1) { ; X64-LABEL: test_andn_u64: ; X64: # %bb.0: -; X64-NEXT: xorq $-1, %rdi -; X64-NEXT: andq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq $-1, %rax +; X64-NEXT: andq %rsi, %rax ; X64-NEXT: retq %xor = xor i64 %a0, -1 %res = and i64 %xor, %a1 Index: test/CodeGen/X86/bmi-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -47,9 +47,9 @@ ; ; X64-LABEL: test__andn_u32: ; X64: # %bb.0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 @@ -199,9 +199,9 @@ ; ; X64-LABEL: test_andn_u32: ; X64: # %bb.0: -; X64-NEXT: xorl $-1, %edi -; X64-NEXT: andl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $-1, %eax +; X64-NEXT: andl %esi, %eax ; X64-NEXT: retq %xor = xor i32 %a0, -1 %res = and i32 %xor, %a1 Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -421,9 +421,9 @@ ; ; X64-LABEL: non_bextr32: ; X64: # %bb.0: # %entry -; X64-NEXT: shrl $2, %edi -; X64-NEXT: andl $111, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $2, %eax +; X64-NEXT: andl $111, %eax ; X64-NEXT: retq entry: %shr = lshr i32 %x, 2 Index: test/CodeGen/X86/bmi2.ll =================================================================== --- test/CodeGen/X86/bmi2.ll +++ test/CodeGen/X86/bmi2.ll @@ -130,15 +130,15 @@ ; ; X64-LABEL: mulx32: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: movl %esi, %eax ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: addl %edi, %edi -; X64-NEXT: addl %esi, %esi -; X64-NEXT: imulq %rdi, %rsi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: movl %eax, (%rdx) -; X64-NEXT: movl %esi, %eax +; X64-NEXT: addl %eax, %eax +; X64-NEXT: imulq %rdi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: movl %ecx, (%rdx) +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %x1 = add i32 %x, %x %y1 = add i32 %y, %y @@ -165,14 +165,14 @@ ; ; X64-LABEL: mulx32_load: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: addl %edi, %edi -; X64-NEXT: movl (%rsi), %eax -; X64-NEXT: imulq %rax, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $32, %rax -; X64-NEXT: movl %eax, (%rdx) ; X64-NEXT: movl %edi, %eax +; X64-NEXT: addl %eax, %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: imulq %rcx, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: movl %ecx, (%rdx) +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %x1 = add i32 %x, %x %y1 = load i32, i32* %y Index: test/CodeGen/X86/bool-math.ll =================================================================== --- test/CodeGen/X86/bool-math.ll +++ test/CodeGen/X86/bool-math.ll @@ -32,9 +32,10 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: orb $46, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: orb $46, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -46,9 +47,10 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: xorb $27, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: xorb $27, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -60,9 +62,9 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: xorl $27, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: xorl $27, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -74,9 +76,10 @@ define i8 @add_zext_cmp_mask_narrower_result(i32 %x) { ; CHECK-LABEL: add_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: xorb $43, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: xorb $43, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -128,9 +131,10 @@ define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: xorb $-29, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: xorb $-29, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -141,9 +145,9 @@ define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: xorl $227, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: xorl $227, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -154,9 +158,10 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: xorb $41, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: xorb $41, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = and i16 %x, 1 %c = icmp eq i16 %a, 0 Index: test/CodeGen/X86/bool-simplify.ll =================================================================== --- test/CodeGen/X86/bool-simplify.ll +++ test/CodeGen/X86/bool-simplify.ll @@ -4,9 +4,9 @@ define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: ptest %xmm0, %xmm0 -; CHECK-NEXT: cmovnel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ptest %xmm0, %xmm0 +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c) %t2 = icmp ne i32 %t1, 0 Index: test/CodeGen/X86/bswap-rotate.ll =================================================================== --- test/CodeGen/X86/bswap-rotate.ll +++ test/CodeGen/X86/bswap-rotate.ll @@ -14,8 +14,9 @@ ; ; X64-LABEL: combine_bswap_rotate: ; X64: # %bb.0: -; X64-NEXT: rolw $9, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $9, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %1 = call i16 @llvm.bswap.i16(i16 %a0) %2 = shl i16 %1, 1 Index: test/CodeGen/X86/bswap-wide-int.ll =================================================================== --- test/CodeGen/X86/bswap-wide-int.ll +++ test/CodeGen/X86/bswap-wide-int.ll @@ -25,14 +25,14 @@ ; ; X64-LABEL: bswap_i64: ; X64: # %bb.0: -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i64: ; X64-MOVBE: # %bb.0: -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rax +; X64-MOVBE-NEXT: bswapq %rax ; X64-MOVBE-NEXT: retq %1 = call i64 @llvm.bswap.i64(i64 %a0) ret i64 %1 @@ -79,17 +79,17 @@ ; ; X64-LABEL: bswap_i128: ; X64: # %bb.0: -; X64-NEXT: bswapq %rsi -; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: bswapq %rdi ; X64-NEXT: movq %rdi, %rdx ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i128: ; X64-MOVBE: # %bb.0: -; X64-MOVBE-NEXT: bswapq %rsi -; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rsi, %rax +; X64-MOVBE-NEXT: bswapq %rax +; X64-MOVBE-NEXT: bswapq %rdi ; X64-MOVBE-NEXT: movq %rdi, %rdx ; X64-MOVBE-NEXT: retq %1 = call i128 @llvm.bswap.i128(i128 %a0) @@ -149,6 +149,7 @@ ; ; X64-LABEL: bswap_i256: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: bswapq %r8 ; X64-NEXT: bswapq %rcx ; X64-NEXT: bswapq %rdx @@ -157,16 +158,15 @@ ; X64-NEXT: movq %rdx, 16(%rdi) ; X64-NEXT: movq %rcx, 8(%rdi) ; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq ; ; X64-MOVBE-LABEL: bswap_i256: ; X64-MOVBE: # %bb.0: +; X64-MOVBE-NEXT: movq %rdi, %rax ; X64-MOVBE-NEXT: movbeq %rsi, 24(%rdi) ; X64-MOVBE-NEXT: movbeq %rdx, 16(%rdi) ; X64-MOVBE-NEXT: movbeq %rcx, 8(%rdi) ; X64-MOVBE-NEXT: movbeq %r8, (%rdi) -; X64-MOVBE-NEXT: movq %rdi, %rax ; X64-MOVBE-NEXT: retq %1 = call i256 @llvm.bswap.i256(i256 %a0) ret i256 %1 Index: test/CodeGen/X86/bswap.ll =================================================================== --- test/CodeGen/X86/bswap.ll +++ test/CodeGen/X86/bswap.ll @@ -19,8 +19,9 @@ ; ; CHECK64-LABEL: W: ; CHECK64: # %bb.0: -; CHECK64-NEXT: rolw $8, %di ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: rolw $8, %ax +; CHECK64-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK64-NEXT: retq %Z = call i16 @llvm.bswap.i16( i16 %A ) ; [#uses=1] ret i16 %Z @@ -35,8 +36,8 @@ ; ; CHECK64-LABEL: X: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapl %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax ; CHECK64-NEXT: retq %Z = call i32 @llvm.bswap.i32( i32 %A ) ; [#uses=1] ret i32 %Z @@ -53,8 +54,8 @@ ; ; CHECK64-LABEL: Y: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapq %rdi ; CHECK64-NEXT: movq %rdi, %rax +; CHECK64-NEXT: bswapq %rax ; CHECK64-NEXT: retq %Z = call i64 @llvm.bswap.i64( i64 %A ) ; [#uses=1] ret i64 %Z @@ -71,9 +72,9 @@ ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: shrl $16, %eax ; CHECK64-NEXT: retq entry: @@ -95,9 +96,9 @@ ; ; CHECK64-LABEL: test2: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: sarl $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: sarl $16, %eax ; CHECK64-NEXT: retq entry: Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- test/CodeGen/X86/bswap_tree.ll +++ test/CodeGen/X86/bswap_tree.ll @@ -20,9 +20,9 @@ ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 @@ -53,9 +53,9 @@ ; ; CHECK64-LABEL: test2: ; CHECK64: # %bb.0: -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: roll $16, %edi ; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte1 = shl i32 %x, 8 %byte0 = lshr i32 %x, 8 Index: test/CodeGen/X86/bswap_tree2.ll =================================================================== --- test/CodeGen/X86/bswap_tree2.ll +++ test/CodeGen/X86/bswap_tree2.ll @@ -25,16 +25,16 @@ ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 ; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: orl %eax, %ecx -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %ecx, %edi -; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 +; CHECK64-NEXT: movl %edi, %edx +; CHECK64-NEXT: orl $-16777216, %edx # imm = 0xFF000000 +; CHECK64-NEXT: shll $8, %ecx +; CHECK64-NEXT: shrl $8, %edx +; CHECK64-NEXT: orl %ecx, %edx +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: shrl $16, %eax +; CHECK64-NEXT: orl %edx, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 Index: test/CodeGen/X86/bt.ll =================================================================== --- test/CodeGen/X86/bt.ll +++ test/CodeGen/X86/bt.ll @@ -1112,16 +1112,16 @@ ; ; X64-LABEL: demanded_i32: ; X64: # %bb.0: +; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %edx, %eax ; X64-NEXT: shrl $5, %eax -; X64-NEXT: movl (%rdi,%rax,4), %r8d -; X64-NEXT: movl $1, %edi -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: btl %edx, %r8d +; X64-NEXT: movl (%rdi,%rax,4), %edi +; X64-NEXT: movl $1, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: btl %ecx, %edi ; X64-NEXT: jae .LBB30_2 ; X64-NEXT: # %bb.1: -; X64-NEXT: orl %edi, (%rsi,%rax,4) +; X64-NEXT: orl %edx, (%rsi,%rax,4) ; X64-NEXT: .LBB30_2: ; X64-NEXT: retq %4 = lshr i32 %2, 5 Index: test/CodeGen/X86/btc_bts_btr.ll =================================================================== --- test/CodeGen/X86/btc_bts_btr.ll +++ test/CodeGen/X86/btc_bts_btr.ll @@ -6,8 +6,9 @@ define i16 @btr_16(i16 %x, i16 %n) { ; X64-LABEL: btr_16: ; X64: # %bb.0: -; X64-NEXT: movw $-2, %ax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movw $-2, %ax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolw %cl, %ax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -28,8 +29,9 @@ define i16 @bts_16(i16 %x, i16 %n) { ; X64-LABEL: bts_16: ; X64: # %bb.0: -; X64-NEXT: btsl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btsl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X86-LABEL: bts_16: @@ -48,8 +50,9 @@ define i16 @btc_16(i16 %x, i16 %n) { ; X64-LABEL: btc_16: ; X64: # %bb.0: -; X64-NEXT: btcl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btcl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X86-LABEL: btc_16: @@ -68,8 +71,8 @@ define i32 @btr_32(i32 %x, i32 %n) { ; X64-LABEL: btr_32: ; X64: # %bb.0: -; X64-NEXT: btrl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btrl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: btr_32: @@ -87,8 +90,8 @@ define i32 @bts_32(i32 %x, i32 %n) { ; X64-LABEL: bts_32: ; X64: # %bb.0: -; X64-NEXT: btsl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btsl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: bts_32: @@ -105,8 +108,8 @@ define i32 @btc_32(i32 %x, i32 %n) { ; X64-LABEL: btc_32: ; X64: # %bb.0: -; X64-NEXT: btcl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btcl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: btc_32: @@ -123,8 +126,8 @@ define i64 @btr_64(i64 %x, i64 %n) { ; X64-LABEL: btr_64: ; X64: # %bb.0: -; X64-NEXT: btrq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btrq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: btr_64: @@ -154,8 +157,8 @@ define i64 @bts_64(i64 %x, i64 %n) { ; X64-LABEL: bts_64: ; X64: # %bb.0: -; X64-NEXT: btsq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btsq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: bts_64: @@ -182,8 +185,8 @@ define i64 @btc_64(i64 %x, i64 %n) { ; X64-LABEL: btc_64: ; X64: # %bb.0: -; X64-NEXT: btcq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btcq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: btc_64: @@ -210,8 +213,9 @@ define i16 @btr_16_mask(i16 %x, i16 %n) { ; X64-LABEL: btr_16_mask: ; X64: # %bb.0: -; X64-NEXT: movw $-2, %ax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movw $-2, %ax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolw %cl, %ax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -233,9 +237,10 @@ define i16 @bts_16_mask(i16 %x, i16 %n) { ; X64-LABEL: bts_16_mask: ; X64: # %bb.0: -; X64-NEXT: andb $15, %sil -; X64-NEXT: btsl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $15, %sil +; X64-NEXT: btsl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X86-LABEL: bts_16_mask: @@ -256,9 +261,10 @@ define i16 @btc_16_mask(i16 %x, i16 %n) { ; X64-LABEL: btc_16_mask: ; X64: # %bb.0: -; X64-NEXT: andb $15, %sil -; X64-NEXT: btcl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $15, %sil +; X64-NEXT: btcl %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X86-LABEL: btc_16_mask: @@ -279,8 +285,8 @@ define i32 @btr_32_mask(i32 %x, i32 %n) { ; X64-LABEL: btr_32_mask: ; X64: # %bb.0: -; X64-NEXT: btrl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btrl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: btr_32_mask: @@ -299,8 +305,8 @@ define i32 @bts_32_mask(i32 %x, i32 %n) { ; X64-LABEL: bts_32_mask: ; X64: # %bb.0: -; X64-NEXT: btsl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btsl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: bts_32_mask: @@ -318,8 +324,8 @@ define i32 @btc_32_mask(i32 %x, i32 %n) { ; X64-LABEL: btc_32_mask: ; X64: # %bb.0: -; X64-NEXT: btcl %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: btcl %esi, %eax ; X64-NEXT: retq ; ; X86-LABEL: btc_32_mask: @@ -337,8 +343,8 @@ define i64 @btr_64_mask(i64 %x, i64 %n) { ; X64-LABEL: btr_64_mask: ; X64: # %bb.0: -; X64-NEXT: btrq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btrq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: btr_64_mask: @@ -369,8 +375,8 @@ define i64 @bts_64_mask(i64 %x, i64 %n) { ; X64-LABEL: bts_64_mask: ; X64: # %bb.0: -; X64-NEXT: btsq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btsq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: bts_64_mask: @@ -398,8 +404,8 @@ define i64 @btc_64_mask(i64 %x, i64 %n) { ; X64-LABEL: btc_64_mask: ; X64: # %bb.0: -; X64-NEXT: btcq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: btcq %rsi, %rax ; X64-NEXT: retq ; ; X86-LABEL: btc_64_mask: @@ -450,8 +456,9 @@ define i16 @bts_16_load(i16* %x, i16 %n) { ; X64-LABEL: bts_16_load: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: orw (%rdi), %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -475,8 +482,9 @@ define i16 @btc_16_load(i16* %x, i16 %n) { ; X64-LABEL: btc_16_load: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: xorw (%rdi), %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -673,8 +681,9 @@ define void @btr_16_dont_fold(i16* %x, i16 %n) { ; X64-LABEL: btr_16_dont_fold: ; X64: # %bb.0: -; X64-NEXT: movw $-2, %ax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movw $-2, %ax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolw %cl, %ax ; X64-NEXT: andw %ax, (%rdi) ; X64-NEXT: retq @@ -698,8 +707,9 @@ define void @bts_16_dont_fold(i16* %x, i16 %n) { ; X64-LABEL: bts_16_dont_fold: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: orw %ax, (%rdi) ; X64-NEXT: retq @@ -722,8 +732,9 @@ define void @btc_16_dont_fold(i16* %x, i16 %n) { ; X64-LABEL: btc_16_dont_fold: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: xorw %ax, (%rdi) ; X64-NEXT: retq @@ -746,8 +757,9 @@ define void @btr_32_dont_fold(i32* %x, i32 %n) { ; X64-LABEL: btr_32_dont_fold: ; X64: # %bb.0: -; X64-NEXT: movl $-2, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $-2, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: roll %cl, %eax ; X64-NEXT: andl %eax, (%rdi) ; X64-NEXT: retq @@ -771,8 +783,9 @@ define void @bts_32_dont_fold(i32* %x, i32 %n) { ; X64-LABEL: bts_32_dont_fold: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: orl %eax, (%rdi) ; X64-NEXT: retq @@ -795,8 +808,9 @@ define void @btc_32_dont_fold(i32* %x, i32 %n) { ; X64-LABEL: btc_32_dont_fold: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: xorl %eax, (%rdi) ; X64-NEXT: retq @@ -819,8 +833,9 @@ define void @btr_64_dont_fold(i64* %x, i64 %n) { ; X64-LABEL: btr_64_dont_fold: ; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq $-2, %rax -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: rolq %cl, %rax ; X64-NEXT: andq %rax, (%rdi) ; X64-NEXT: retq @@ -860,8 +875,9 @@ define void @bts_64_dont_fold(i64* %x, i64 %n) { ; X64-LABEL: bts_64_dont_fold: ; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movl $1, %eax -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shlq %cl, %rax ; X64-NEXT: orq %rax, (%rdi) ; X64-NEXT: retq @@ -898,8 +914,9 @@ define void @btc_64_dont_fold(i64* %x, i64 %n) { ; X64-LABEL: btc_64_dont_fold: ; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movl $1, %eax -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shlq %cl, %rax ; X64-NEXT: xorq %rax, (%rdi) ; X64-NEXT: retq Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -8,17 +8,17 @@ ; CHECK-LABEL: Test_get_quotient: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-NEXT: retq @@ -30,21 +30,20 @@ ; CHECK-LABEL: Test_get_remainder: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: def $edx killed $edx def $rdx -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %result = srem i64 %a, %b ret i64 %result @@ -54,18 +53,18 @@ ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: # kill: def $eax killed $eax def $rax Index: test/CodeGen/X86/clear-highbits.ll =================================================================== --- test/CodeGen/X86/clear-highbits.ll +++ test/CodeGen/X86/clear-highbits.ll @@ -33,10 +33,11 @@ ; X64-LABEL: clear_highbits8_c0: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlb %cl, %dil -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = lshr i8 -1, %numhighbits %masked = and i8 %mask, %val @@ -79,10 +80,11 @@ ; X64-LABEL: clear_highbits8_c4_commutative: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlb %cl, %dil -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = lshr i8 -1, %numhighbits %masked = and i8 %val, %mask ; swapped order @@ -340,10 +342,10 @@ ; X64-NOBMI2-LABEL: clear_highbits32_c0: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shll %cl, %edi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_highbits32_c0: @@ -375,10 +377,10 @@ ; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shll %cl, %edi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_highbits32_c1_indexzext: @@ -488,10 +490,10 @@ ; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shll %cl, %edi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_highbits32_c4_commutative: @@ -545,10 +547,10 @@ ; X64-NOBMI2-LABEL: clear_highbits64_c0: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shrq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_highbits64_c0: @@ -598,10 +600,10 @@ ; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shlq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shrq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_highbits64_c1_indexzext: @@ -775,10 +777,10 @@ ; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shrq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_highbits64_c4_commutative: @@ -834,9 +836,10 @@ ; X64-NOBMI2-NEXT: pushq %rbp ; X64-NOBMI2-NEXT: pushq %rbx ; X64-NOBMI2-NEXT: pushq %rax +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %ebx ; X64-NOBMI2-NEXT: movl $-1, %ebp -; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %ebp ; X64-NOBMI2-NEXT: movl %ebp, %edi ; X64-NOBMI2-NEXT: callq use32 @@ -934,9 +937,10 @@ ; X64-NOBMI2-NEXT: pushq %r14 ; X64-NOBMI2-NEXT: pushq %rbx ; X64-NOBMI2-NEXT: pushq %rax +; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %r14 ; X64-NOBMI2-NEXT: movq $-1, %rbx -; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shrq %cl, %rbx ; X64-NOBMI2-NEXT: movq %rbx, %rdi ; X64-NOBMI2-NEXT: callq use64 Index: test/CodeGen/X86/clear-lowbits.ll =================================================================== --- test/CodeGen/X86/clear-lowbits.ll +++ test/CodeGen/X86/clear-lowbits.ll @@ -35,10 +35,11 @@ ; X64-LABEL: clear_lowbits8_c0: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrb %cl, %dil -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = shl i8 -1, %numlowbits %masked = and i8 %mask, %val @@ -81,10 +82,11 @@ ; X64-LABEL: clear_lowbits8_c4_commutative: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrb %cl, %dil -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %cl, %al +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %mask = shl i8 -1, %numlowbits %masked = and i8 %val, %mask ; swapped order @@ -327,10 +329,10 @@ ; X64-NOBMI2-LABEL: clear_lowbits32_c0: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_c0: @@ -362,10 +364,10 @@ ; X64-NOBMI2-LABEL: clear_lowbits32_c1_indexzext: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_c1_indexzext: @@ -475,10 +477,10 @@ ; X64-NOBMI2-LABEL: clear_lowbits32_c4_commutative: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_c4_commutative: @@ -530,10 +532,10 @@ ; X64-NOBMI2-LABEL: clear_lowbits64_c0: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: shrq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_c0: @@ -583,10 +585,10 @@ ; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: shrq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shlq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_c1_indexzext: @@ -760,10 +762,10 @@ ; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative: ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: shrq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_c4_commutative: @@ -794,11 +796,12 @@ ; ; X64-LABEL: clear_lowbits8_ic0: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movb $8, %cl ; X64-NEXT: subb %sil, %cl -; X64-NEXT: shrb %cl, %dil -; X64-NEXT: shlb %cl, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %cl, %al +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %numhighbits = sub i8 8, %numlowbits %mask = shl i8 -1, %numhighbits @@ -844,11 +847,12 @@ ; ; X64-LABEL: clear_lowbits8_ic4_commutative: ; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movb $8, %cl ; X64-NEXT: subb %sil, %cl -; X64-NEXT: shrb %cl, %dil -; X64-NEXT: shlb %cl, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %cl, %al +; X64-NEXT: shlb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %numhighbits = sub i8 8, %numlowbits %mask = shl i8 -1, %numhighbits @@ -1125,12 +1129,12 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic0: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %edi, %eax ; X64-NOBMI2-NEXT: movl $32, %ecx ; X64-NOBMI2-NEXT: subl %esi, %ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi +; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edi -; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic0: @@ -1166,11 +1170,11 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %edi, %eax ; X64-NOBMI2-NEXT: movb $32, %cl ; X64-NOBMI2-NEXT: subb %sil, %cl -; X64-NOBMI2-NEXT: shrl %cl, %edi -; X64-NOBMI2-NEXT: shll %cl, %edi -; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext: @@ -1297,12 +1301,12 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %edi, %eax ; X64-NOBMI2-NEXT: movl $32, %ecx ; X64-NOBMI2-NEXT: subl %esi, %ecx -; X64-NOBMI2-NEXT: shrl %cl, %edi +; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edi -; X64-NOBMI2-NEXT: movl %edi, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative: @@ -1358,12 +1362,12 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic0: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rdi, %rax ; X64-NOBMI2-NEXT: movl $64, %ecx ; X64-NOBMI2-NEXT: subl %esi, %ecx -; X64-NOBMI2-NEXT: shrq %cl, %rdi +; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shlq %cl, %rdi -; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic0: @@ -1417,11 +1421,11 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rdi, %rax ; X64-NOBMI2-NEXT: movb $64, %cl ; X64-NOBMI2-NEXT: subb %sil, %cl -; X64-NOBMI2-NEXT: shrq %cl, %rdi -; X64-NOBMI2-NEXT: shlq %cl, %rdi -; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext: @@ -1608,12 +1612,12 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rdi, %rax ; X64-NOBMI2-NEXT: movl $64, %ecx ; X64-NOBMI2-NEXT: subl %esi, %ecx -; X64-NOBMI2-NEXT: shrq %cl, %rdi +; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shlq %cl, %rdi -; X64-NOBMI2-NEXT: movq %rdi, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative: @@ -1672,9 +1676,10 @@ ; X64-NOBMI2-NEXT: pushq %rbp ; X64-NOBMI2-NEXT: pushq %rbx ; X64-NOBMI2-NEXT: pushq %rax +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %ebx ; X64-NOBMI2-NEXT: movl $-1, %ebp -; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %ebp ; X64-NOBMI2-NEXT: movl %ebp, %edi ; X64-NOBMI2-NEXT: callq use32 @@ -1772,9 +1777,10 @@ ; X64-NOBMI2-NEXT: pushq %r14 ; X64-NOBMI2-NEXT: pushq %rbx ; X64-NOBMI2-NEXT: pushq %rax +; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %r14 ; X64-NOBMI2-NEXT: movq $-1, %rbx -; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rbx ; X64-NOBMI2-NEXT: movq %rbx, %rdi ; X64-NOBMI2-NEXT: callq use64 Index: test/CodeGen/X86/cmov-into-branch.ll =================================================================== --- test/CodeGen/X86/cmov-into-branch.ll +++ test/CodeGen/X86/cmov-into-branch.ll @@ -5,9 +5,9 @@ define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: ucomisd (%rdi), %xmm0 -; CHECK-NEXT: cmovbel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: ucomisd (%rdi), %xmm0 +; CHECK-NEXT: cmovbel %edx, %eax ; CHECK-NEXT: retq %load = load double, double* %b, align 8 %cmp = fcmp olt double %load, %a @@ -19,9 +19,9 @@ define i32 @test2(double %a, double %b, i32 %x, i32 %y) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: cmovbel %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq %cmp = fcmp ogt double %a, %b %cond = select i1 %cmp, i32 %x, i32 %y @@ -48,10 +48,10 @@ define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %edi, (%rsi) -; CHECK-NEXT: cmoval %edi, %ecx -; CHECK-NEXT: cmovael %edx, %ecx ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cmpl %edi, (%rsi) +; CHECK-NEXT: cmoval %edi, %eax +; CHECK-NEXT: cmovael %edx, %eax ; CHECK-NEXT: retq %load = load i32, i32* %b, align 4 %cmp = icmp ult i32 %load, %a @@ -83,9 +83,9 @@ define i32 @weighted_select1(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select1: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0 @@ -96,12 +96,12 @@ define i32 @weighted_select2(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select2: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: jne .LBB6_2 ; CHECK-NEXT: # %bb.1: # %select.false -; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: .LBB6_2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !1 @@ -115,14 +115,13 @@ define i32 @weighted_select3(i32 %a, i32 %b) { ; CHECK-LABEL: weighted_select3: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je .LBB7_1 ; CHECK-NEXT: # %bb.2: # %select.end -; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB7_1: # %select.false -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2 @@ -133,9 +132,9 @@ define i32 @unweighted_select(i32 %a, i32 %b) { ; CHECK-LABEL: unweighted_select: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !3 Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -194,11 +194,14 @@ ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB6_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: jne .LBB6_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %d = select i1 %c, i8 %a, i8 %b ret i8 %d Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i32: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovnel %esi, %edi -; CMOV-NEXT: cmovpl %esi, %edi ; CMOV-NEXT: movl %edi, %eax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovnel %esi, %eax +; CMOV-NEXT: cmovpl %esi, %eax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -36,10 +36,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rsi, %rdi -; CMOV-NEXT: cmovpq %rsi, %rdi ; CMOV-NEXT: movq %rdi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rsi, %rax +; CMOV-NEXT: cmovpq %rsi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) @@ -64,10 +64,10 @@ ; CHECK-LABEL: test_select_fcmp_une_i64: -; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: cmovneq %rdi, %rsi -; CMOV-NEXT: cmovpq %rdi, %rsi ; CMOV-NEXT: movq %rsi, %rax +; CMOV-NEXT: ucomiss %xmm1, %xmm0 +; CMOV-NEXT: cmovneq %rdi, %rax +; CMOV-NEXT: cmovpq %rdi, %rax ; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) Index: test/CodeGen/X86/cmp.ll =================================================================== --- test/CodeGen/X86/cmp.ll +++ test/CodeGen/X86/cmp.ll @@ -271,9 +271,9 @@ define i32 @test13(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] -; CHECK-NEXT: cmovnel %edx, %esi # encoding: [0x0f,0x45,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08] +; CHECK-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %mask, 8 @@ -286,9 +286,9 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] -; CHECK-NEXT: cmovnsl %edx, %esi # encoding: [0x0f,0x49,0xf2] ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] +; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] +; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2] ; CHECK-NEXT: retq # encoding: [0xc3] entry: %s = lshr i32 %mask, 7 Index: test/CodeGen/X86/cmpxchg-clobber-flags.ll =================================================================== --- test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -151,6 +151,7 @@ ; ; 64-ALL-LABEL: test_control_flow: ; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: movl %esi, %eax ; 64-ALL-NEXT: cmpl %edx, %esi ; 64-ALL-NEXT: jle .LBB1_5 ; 64-ALL-NEXT: .p2align 4, 0x90 @@ -171,9 +172,8 @@ ; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi) ; 64-ALL-NEXT: jne .LBB1_1 ; 64-ALL-NEXT: # %bb.4: -; 64-ALL-NEXT: xorl %esi, %esi +; 64-ALL-NEXT: xorl %eax, %eax ; 64-ALL-NEXT: .LBB1_5: # %cond.end -; 64-ALL-NEXT: movl %esi, %eax ; 64-ALL-NEXT: retq entry: %cmp = icmp sgt i32 %i, %j Index: test/CodeGen/X86/cmpxchg-i128-i1.ll =================================================================== --- test/CodeGen/X86/cmpxchg-i128-i1.ll +++ test/CodeGen/X86/cmpxchg-i128-i1.ll @@ -7,10 +7,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: sete %al ; CHECK-NEXT: popq %rbx @@ -27,10 +26,9 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne .LBB1_2 ; CHECK-NEXT: # %bb.1: # %true @@ -64,14 +62,13 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 -; CHECK-NEXT: movq %rdx, %r10 +; CHECK-NEXT: movq %rcx, %rbx +; CHECK-NEXT: movq %rdx, %r9 ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: cmpq %rsi, %rax -; CHECK-NEXT: sbbq %r10, %rdx +; CHECK-NEXT: sbbq %r9, %rdx ; CHECK-NEXT: setge %al ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -88,15 +85,14 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rcx, %r9 -; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) -; CHECK-NEXT: sete %r10b +; CHECK-NEXT: sete %sil ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movq %r10, %rax +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq Index: test/CodeGen/X86/combine-add.ll =================================================================== --- test/CodeGen/X86/combine-add.ll +++ test/CodeGen/X86/combine-add.ll @@ -103,8 +103,8 @@ define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add0: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add0: @@ -121,8 +121,8 @@ define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add1: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add1: @@ -139,8 +139,8 @@ define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add2: ; SSE: # %bb.0: -; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: paddd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add2: @@ -157,8 +157,8 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_add3: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_add3: @@ -203,9 +203,9 @@ ; ; AVX-LABEL: combine_vec_add_uniquebits: ; AVX: # %bb.0: -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680] ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855] ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq Index: test/CodeGen/X86/combine-sdiv.ll =================================================================== --- test/CodeGen/X86/combine-sdiv.ll +++ test/CodeGen/X86/combine-sdiv.ll @@ -28,8 +28,8 @@ define i32 @combine_sdiv_by_negone(i32 %x) { ; CHECK-LABEL: combine_sdiv_by_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %1 = sdiv i32 %x, -1 ret i32 %1 Index: test/CodeGen/X86/combine-udiv.ll =================================================================== --- test/CodeGen/X86/combine-udiv.ll +++ test/CodeGen/X86/combine-udiv.ll @@ -55,8 +55,8 @@ define i32 @combine_udiv_by_minsigned(i32 %x) { ; CHECK-LABEL: combine_udiv_by_minsigned: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: retq %1 = udiv i32 %x, -2147483648 ret i32 %1 @@ -80,9 +80,9 @@ define i32 @combine_udiv_dupe(i32 %x) { ; CHECK-LABEL: combine_udiv_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %edi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %eax ; CHECK-NEXT: retq %1 = udiv i32 %x, %x ret i32 %1 Index: test/CodeGen/X86/combine-urem.ll =================================================================== --- test/CodeGen/X86/combine-urem.ll +++ test/CodeGen/X86/combine-urem.ll @@ -62,8 +62,8 @@ define i32 @combine_urem_by_minsigned(i32 %x) { ; CHECK-LABEL: combine_urem_by_minsigned: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: retq %1 = urem i32 %x, -2147483648 ret i32 %1 @@ -93,9 +93,9 @@ define i32 @combine_urem_dupe(i32 %x) { ; CHECK-LABEL: combine_urem_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %edi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %eax ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %1 = urem i32 %x, %x Index: test/CodeGen/X86/conditional-indecrement.ll =================================================================== --- test/CodeGen/X86/conditional-indecrement.ll +++ test/CodeGen/X86/conditional-indecrement.ll @@ -4,9 +4,9 @@ define i32 @test1(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -17,9 +17,9 @@ define i32 @test1_commute(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test1_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %cmp = icmp ne i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -30,9 +30,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -43,9 +43,9 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -56,9 +56,9 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -69,9 +69,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 @@ -82,9 +82,9 @@ define i32 @test6(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -95,9 +95,9 @@ define i32 @test7(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %a, 0 %inc = zext i1 %cmp to i32 @@ -108,9 +108,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind readnone { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: adcl $-1, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %not.cmp = icmp ne i32 %a, 0 %inc = zext i1 %not.cmp to i32 Index: test/CodeGen/X86/dagcombine-select.ll =================================================================== --- test/CodeGen/X86/dagcombine-select.ll +++ test/CodeGen/X86/dagcombine-select.ll @@ -194,10 +194,11 @@ define i32 @shl_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: shl_constant_sel_constants: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: xorb $3, %dil -; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: xorb $3, %cl +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 2, i32 3 @@ -208,10 +209,11 @@ define i32 @lshr_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: lshr_constant_sel_constants: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: xorb $3, %dil -; CHECK-NEXT: movl $64, %eax ; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: xorb $3, %cl +; CHECK-NEXT: movl $64, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 2, i32 3 @@ -222,10 +224,11 @@ define i32 @ashr_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: ashr_constant_sel_constants: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: xorb $3, %dil -; CHECK-NEXT: movl $128, %eax ; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: xorb $3, %cl +; CHECK-NEXT: movl $128, %eax +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 2, i32 3 Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -94,8 +94,8 @@ ; X32: # %bb.0: ; X32-NEXT: movl $365384439, %eax # imm = 0x15C752F7 ; X32-NEXT: mull {{[0-9]+}}(%esp) -; X32-NEXT: shrl $27, %edx ; X32-NEXT: movl %edx, %eax +; X32-NEXT: shrl $27, %eax ; X32-NEXT: retl ; ; X64-LABEL: test5: @@ -216,9 +216,9 @@ ; ; X64-LABEL: testsize1: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $32 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -239,9 +239,9 @@ ; ; X64-LABEL: testsize2: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: retq @@ -259,8 +259,8 @@ ; ; X64-LABEL: testsize3: ; X64: # %bb.0: # %entry -; X64-NEXT: shrl $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $5, %eax ; X64-NEXT: retq entry: %div = udiv i32 %x, 32 @@ -279,10 +279,10 @@ ; ; X64-LABEL: testsize4: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pushq $33 ; X64-NEXT: popq %rcx ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: movl %edi, %eax ; X64-NEXT: divl %ecx ; X64-NEXT: retq entry: @@ -310,19 +310,18 @@ ; ; X64-LABEL: PR23590: ; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F +; X64-NEXT: movabsq $6120523590596543007, %rcx # imm = 0x54F077C718E7C21F ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rdx +; X64-NEXT: mulq %rcx ; X64-NEXT: shrq $12, %rdx ; X64-NEXT: imulq $12345, %rdx, %rax # imm = 0x3039 -; X64-NEXT: subq %rax, %rcx -; X64-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdx -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: leaq (%rcx,%rdx), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: leaq (%rdi,%rdx), %rax ; X64-NEXT: shrq $2, %rax ; X64-NEXT: retq entry: Index: test/CodeGen/X86/divrem.ll =================================================================== --- test/CodeGen/X86/divrem.ll +++ test/CodeGen/X86/divrem.ll @@ -101,6 +101,7 @@ ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: cwtd ; X64-NEXT: idivw %si ; X64-NEXT: movw %ax, (%r8) @@ -131,6 +132,7 @@ ; X64-LABEL: si8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %esi @@ -182,8 +184,8 @@ ; X64-LABEL: ui64: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rsi ; X64-NEXT: movq %rax, (%r8) ; X64-NEXT: movq %rdx, (%rcx) @@ -212,8 +214,8 @@ ; X64-LABEL: ui32: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movl %eax, (%r8) ; X64-NEXT: movl %edx, (%rcx) @@ -242,8 +244,9 @@ ; X64-LABEL: ui16: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: divw %si ; X64-NEXT: movw %ax, (%r8) ; X64-NEXT: movw %dx, (%rcx) Index: test/CodeGen/X86/divrem8_ext.ll =================================================================== --- test/CodeGen/X86/divrem8_ext.ll +++ test/CodeGen/X86/divrem8_ext.ll @@ -112,6 +112,7 @@ ; X64-LABEL: test_sdivrem_sext_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %ecx @@ -137,6 +138,7 @@ ; X64-LABEL: test_srem_sext_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax @@ -161,6 +163,7 @@ ; X64-LABEL: test_srem_noext_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax @@ -186,6 +189,7 @@ ; X64-LABEL: test_srem_sext64_ah: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: cbtw ; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax Index: test/CodeGen/X86/extract-lowbits.ll =================================================================== --- test/CodeGen/X86/extract-lowbits.ll +++ test/CodeGen/X86/extract-lowbits.ll @@ -45,8 +45,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_a0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: decl %eax ; X64-NOBMI-NEXT: andl %edi, %eax @@ -80,8 +81,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_a1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: decl %eax ; X64-NOBMI-NEXT: andl %edi, %eax @@ -118,8 +120,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_a2_load: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: decl %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax @@ -156,8 +159,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: decl %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax @@ -193,8 +197,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_a4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: decl %eax ; X64-NOBMI-NEXT: andl %edi, %eax @@ -253,8 +258,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_a0: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movl $1, %eax -; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: decq %rax ; X64-NOBMI-NEXT: andq %rdi, %rax @@ -311,8 +317,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_a1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: decq %rax ; X64-NOBMI-NEXT: andq %rdi, %rax @@ -377,8 +384,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_a2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movl $1, %eax -; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: decq %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax @@ -442,8 +450,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: decq %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax @@ -503,8 +512,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_a4_commutative: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movl $1, %eax -; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: decq %rax ; X64-NOBMI-NEXT: andq %rdi, %rax @@ -542,8 +552,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_b0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $-1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: notl %eax ; X64-NOBMI-NEXT: andl %edi, %eax @@ -577,8 +588,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_b1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $-1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: notl %eax ; X64-NOBMI-NEXT: andl %edi, %eax @@ -615,8 +627,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_b2_load: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $-1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: notl %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax @@ -653,8 +666,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $-1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: notl %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax @@ -690,8 +704,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_b4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $-1, %eax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: notl %eax ; X64-NOBMI-NEXT: andl %edi, %eax @@ -749,8 +764,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_b0: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: notq %rax ; X64-NOBMI-NEXT: andq %rdi, %rax @@ -806,8 +822,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_b1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movq $-1, %rax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movq $-1, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: notq %rax ; X64-NOBMI-NEXT: andq %rdi, %rax @@ -869,8 +886,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_b2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: notq %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax @@ -931,8 +949,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movq $-1, %rax ; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movq $-1, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: notq %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax @@ -991,8 +1010,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_b4_commutative: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: notq %rax ; X64-NOBMI-NEXT: andq %rdi, %rax @@ -1031,12 +1051,12 @@ ; ; X64-NOBMI-LABEL: bzhi32_c0: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: movl $32, %ecx ; X64-NOBMI-NEXT: subl %esi, %ecx -; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI-NEXT: shrl %cl, %edi -; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi32_c0: @@ -1067,11 +1087,11 @@ ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: movb $32, %cl ; X64-NOBMI-NEXT: subb %sil, %cl -; X64-NOBMI-NEXT: shll %cl, %edi -; X64-NOBMI-NEXT: shrl %cl, %edi -; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext: @@ -1183,12 +1203,12 @@ ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: movl $32, %ecx ; X64-NOBMI-NEXT: subl %esi, %ecx -; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI-NEXT: shrl %cl, %edi -; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative: @@ -1241,12 +1261,12 @@ ; ; X64-NOBMI-LABEL: bzhi64_c0: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: movl $64, %ecx ; X64-NOBMI-NEXT: subl %esi, %ecx -; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI-NEXT: shrq %cl, %rdi -; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_c0: @@ -1297,11 +1317,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_c1_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: movb $64, %cl ; X64-NOBMI-NEXT: subb %sil, %cl -; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: shrq %cl, %rdi -; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext: @@ -1481,12 +1501,12 @@ ; ; X64-NOBMI-LABEL: bzhi64_c4_commutative: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: movl $64, %ecx ; X64-NOBMI-NEXT: subl %esi, %ecx -; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI-NEXT: shrq %cl, %rdi -; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative: @@ -1522,12 +1542,12 @@ ; ; X64-NOBMI-LABEL: bzhi32_d0: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: movl $32, %ecx ; X64-NOBMI-NEXT: subl %esi, %ecx -; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI-NEXT: shrl %cl, %edi -; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi32_d0: @@ -1558,11 +1578,11 @@ ; ; X64-NOBMI-LABEL: bzhi32_d1_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: movb $32, %cl ; X64-NOBMI-NEXT: subb %sil, %cl -; X64-NOBMI-NEXT: shll %cl, %edi -; X64-NOBMI-NEXT: shrl %cl, %edi -; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext: @@ -1731,12 +1751,12 @@ ; ; X64-NOBMI-LABEL: bzhi64_d0: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: movl $64, %ecx ; X64-NOBMI-NEXT: subl %esi, %ecx -; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: shlq %cl, %rax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI-NEXT: shrq %cl, %rdi -; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_d0: @@ -1823,11 +1843,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d1_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: movb $64, %cl ; X64-NOBMI-NEXT: subb %sil, %cl -; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: shrq %cl, %rdi -; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; ; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext: @@ -2048,8 +2068,8 @@ ; ; X64-LABEL: bzhi32_constant_mask32: ; X64: # %bb.0: -; X64-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; X64-NEXT: retq %masked = and i32 %val, 2147483647 ret i32 %masked @@ -2082,8 +2102,8 @@ ; ; X64-LABEL: bzhi32_constant_mask16: ; X64: # %bb.0: -; X64-NEXT: andl $32767, %edi # imm = 0x7FFF ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq %masked = and i32 %val, 32767 ret i32 %masked @@ -2116,8 +2136,8 @@ ; ; X64-LABEL: bzhi32_constant_mask8: ; X64: # %bb.0: -; X64-NEXT: andl $127, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $127, %eax ; X64-NEXT: retq %masked = and i32 %val, 127 ret i32 %masked @@ -2211,8 +2231,8 @@ ; ; X64-LABEL: bzhi64_constant_mask32: ; X64: # %bb.0: -; X64-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; X64-NEXT: retq %masked = and i64 %val, 2147483647 ret i64 %masked @@ -2247,8 +2267,8 @@ ; ; X64-LABEL: bzhi64_constant_mask16: ; X64: # %bb.0: -; X64-NEXT: andl $32767, %edi # imm = 0x7FFF ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq %masked = and i64 %val, 32767 ret i64 %masked @@ -2283,8 +2303,8 @@ ; ; X64-LABEL: bzhi64_constant_mask8: ; X64: # %bb.0: -; X64-NEXT: andl $127, %edi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $127, %eax ; X64-NEXT: retq %masked = and i64 %val, 127 ret i64 %masked Index: test/CodeGen/X86/fast-isel-fold-mem.ll =================================================================== --- test/CodeGen/X86/fast-isel-fold-mem.ll +++ test/CodeGen/X86/fast-isel-fold-mem.ll @@ -5,8 +5,8 @@ define i64 @fold_load(i64* %a, i64 %b) { ; CHECK-LABEL: fold_load: ; CHECK: ## %bb.0: -; CHECK-NEXT: addq (%rdi), %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: addq (%rdi), %rax ; CHECK-NEXT: retq %1 = load i64, i64* %a, align 8 %2 = add i64 %1, %b Index: test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov.ll +++ test/CodeGen/X86/fast-isel-select-cmov.ll @@ -31,9 +31,9 @@ define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) { ; CHECK-LABEL: select_cmov_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmovel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %1 = select i1 %cond, i32 %a, i32 %b ret i32 %1 @@ -42,9 +42,9 @@ define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) { ; CHECK-LABEL: select_cmp_cmov_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: cmovbl %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: retq %1 = icmp ult i32 %a, %b %2 = select i1 %1, i32 %a, i32 %b @@ -54,9 +54,9 @@ define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) { ; CHECK-LABEL: select_cmov_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmoveq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rax ; CHECK-NEXT: retq %1 = select i1 %cond, i64 %a, i64 %b ret i64 %1 @@ -65,9 +65,9 @@ define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) { ; CHECK-LABEL: select_cmp_cmov_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rdi, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbq %rdi, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %a, i64 %b Index: test/CodeGen/X86/fast-isel-select-cmov2.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-cmov2.ll +++ test/CodeGen/X86/fast-isel-select-cmov2.ll @@ -19,30 +19,30 @@ define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) { ; SDAG-LABEL: select_fcmp_oeq_cmov: ; SDAG: ## %bb.0: -; SDAG-NEXT: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rsi, %rdi -; SDAG-NEXT: cmovpq %rsi, %rdi ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rsi, %rax +; SDAG-NEXT: cmovpq %rsi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: select_fcmp_oeq_cmov: ; FAST: ## %bb.0: -; FAST-NEXT: ucomisd %xmm1, %xmm0 -; FAST-NEXT: setnp %al -; FAST-NEXT: sete %cl -; FAST-NEXT: testb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: ucomisd %xmm1, %xmm0 +; FAST-NEXT: setnp %cl +; FAST-NEXT: sete %dl +; FAST-NEXT: testb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax ; FAST-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_oeq_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: setnp %al -; FAST_AVX-NEXT: sete %cl -; FAST_AVX-NEXT: testb %al, %cl -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: setnp %cl +; FAST_AVX-NEXT: sete %dl +; FAST_AVX-NEXT: testb %cl, %dl +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp oeq double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -52,16 +52,16 @@ define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ogt_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovbeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovbeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ogt_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovbeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovbeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ogt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -71,16 +71,16 @@ define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_oge_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovbq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovbq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_oge_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovbq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovbq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp oge double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -90,16 +90,16 @@ define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_olt_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovbeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovbeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_olt_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovbeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovbeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp olt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -109,16 +109,16 @@ define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ole_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovbq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovbq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ole_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovbq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovbq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ole double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -128,16 +128,16 @@ define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_one_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmoveq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmoveq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_one_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp one double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -147,16 +147,16 @@ define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ord_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovpq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovpq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ord_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovpq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovpq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ord double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -166,16 +166,16 @@ define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_uno_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovnpq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovnpq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_uno_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovnpq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovnpq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp uno double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -185,16 +185,16 @@ define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ueq_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovneq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovneq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ueq_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovneq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovneq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ueq double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -204,16 +204,16 @@ define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ugt_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovaeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovaeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ugt_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovaeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovaeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ugt double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -223,16 +223,16 @@ define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_uge_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm0, %xmm1 -; NOAVX-NEXT: cmovaq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm0, %xmm1 +; NOAVX-NEXT: cmovaq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_uge_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 -; FAST_AVX-NEXT: cmovaq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm0, %xmm1 +; FAST_AVX-NEXT: cmovaq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp uge double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -242,16 +242,16 @@ define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ult_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovaeq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovaeq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ult_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovaeq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovaeq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ult double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -261,16 +261,16 @@ define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) { ; NOAVX-LABEL: select_fcmp_ule_cmov: ; NOAVX: ## %bb.0: -; NOAVX-NEXT: ucomisd %xmm1, %xmm0 -; NOAVX-NEXT: cmovaq %rsi, %rdi ; NOAVX-NEXT: movq %rdi, %rax +; NOAVX-NEXT: ucomisd %xmm1, %xmm0 +; NOAVX-NEXT: cmovaq %rsi, %rax ; NOAVX-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_ule_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: cmovaq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: cmovaq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp ule double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -280,30 +280,30 @@ define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) { ; SDAG-LABEL: select_fcmp_une_cmov: ; SDAG: ## %bb.0: -; SDAG-NEXT: ucomisd %xmm1, %xmm0 -; SDAG-NEXT: cmovneq %rdi, %rsi -; SDAG-NEXT: cmovpq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: ucomisd %xmm1, %xmm0 +; SDAG-NEXT: cmovneq %rdi, %rax +; SDAG-NEXT: cmovpq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: select_fcmp_une_cmov: ; FAST: ## %bb.0: -; FAST-NEXT: ucomisd %xmm1, %xmm0 -; FAST-NEXT: setp %al -; FAST-NEXT: setne %cl -; FAST-NEXT: orb %al, %cl -; FAST-NEXT: cmoveq %rsi, %rdi ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: ucomisd %xmm1, %xmm0 +; FAST-NEXT: setp %cl +; FAST-NEXT: setne %dl +; FAST-NEXT: orb %cl, %dl +; FAST-NEXT: cmoveq %rsi, %rax ; FAST-NEXT: retq ; ; FAST_AVX-LABEL: select_fcmp_une_cmov: ; FAST_AVX: ## %bb.0: -; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 -; FAST_AVX-NEXT: setp %al -; FAST_AVX-NEXT: setne %cl -; FAST_AVX-NEXT: orb %al, %cl -; FAST_AVX-NEXT: cmoveq %rsi, %rdi ; FAST_AVX-NEXT: movq %rdi, %rax +; FAST_AVX-NEXT: vucomisd %xmm1, %xmm0 +; FAST_AVX-NEXT: setp %cl +; FAST_AVX-NEXT: setne %dl +; FAST_AVX-NEXT: orb %cl, %dl +; FAST_AVX-NEXT: cmoveq %rsi, %rax ; FAST_AVX-NEXT: retq %1 = fcmp une double %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -323,9 +323,9 @@ define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_eq_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovneq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovneq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp eq i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -335,9 +335,9 @@ define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ne_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmoveq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ne i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -347,9 +347,9 @@ define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ugt_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ugt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -360,9 +360,9 @@ define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_uge_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovbq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovbq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp uge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -372,9 +372,9 @@ define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ult_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ult i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -384,9 +384,9 @@ define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_ule_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovaq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovaq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp ule i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -396,9 +396,9 @@ define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sgt_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovleq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovleq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sgt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -408,9 +408,9 @@ define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sge_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovlq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sge i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -420,9 +420,9 @@ define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_slt_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgeq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgeq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp slt i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d @@ -432,9 +432,9 @@ define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: select_icmp_sle_cmov: ; CHECK: ## %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: cmovgq %rcx, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmovgq %rcx, %rax ; CHECK-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, i64 %c, i64 %d Index: test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll +++ test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll @@ -281,11 +281,14 @@ ; CHECK-LABEL: select_icmp_sle_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: jle LBB12_2 -; CHECK-NEXT: ## %bb.1: -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: LBB12_2: +; CHECK-NEXT: jle LBB12_1 +; CHECK-NEXT: ## %bb.2: +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB12_1: ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %1 = icmp sle i64 %a, %b %2 = select i1 %1, i8 %c, i8 %d Index: test/CodeGen/X86/fast-isel-sext-zext.ll =================================================================== --- test/CodeGen/X86/fast-isel-sext-zext.ll +++ test/CodeGen/X86/fast-isel-sext-zext.ll @@ -12,9 +12,10 @@ ; ; X64-LABEL: test1: ; X64: ## %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: ## kill: def $al killed $al killed $eax ; X64-NEXT: retq %z = trunc i8 %x to i1 %u = sext i1 %z to i8 @@ -92,8 +93,9 @@ ; ; X64-LABEL: test5: ; X64: ## %bb.0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: ## kill: def $al killed $al killed $eax ; X64-NEXT: retq %z = trunc i8 %x to i1 %u = zext i1 %z to i8 Index: test/CodeGen/X86/fast-isel-shift.ll =================================================================== --- test/CodeGen/X86/fast-isel-shift.ll +++ test/CodeGen/X86/fast-isel-shift.ll @@ -5,8 +5,10 @@ ; CHECK-LABEL: shl_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shlb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shlb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = shl i8 %a, %b ret i8 %c @@ -16,9 +18,11 @@ ; CHECK-LABEL: shl_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cx -; CHECK-NEXT: shlw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: ## kill: def $cl killed $cx +; CHECK-NEXT: shlw %cl, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = shl i16 %a, %b ret i16 %c @@ -28,9 +32,9 @@ ; CHECK-LABEL: shl_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $ecx -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $ecx +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq %c = shl i32 %a, %b ret i32 %c @@ -40,9 +44,9 @@ ; CHECK-LABEL: shl_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: def $cl killed $rcx -; CHECK-NEXT: shlq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: def $cl killed $rcx +; CHECK-NEXT: shlq %cl, %rax ; CHECK-NEXT: retq %c = shl i64 %a, %b ret i64 %c @@ -52,8 +56,10 @@ ; CHECK-LABEL: lshr_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shrb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shrb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, %b ret i8 %c @@ -63,9 +69,11 @@ ; CHECK-LABEL: lshr_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cx -; CHECK-NEXT: shrw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: ## kill: def $cl killed $cx +; CHECK-NEXT: shrw %cl, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, %b ret i16 %c @@ -75,9 +83,9 @@ ; CHECK-LABEL: lshr_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $ecx -; CHECK-NEXT: shrl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $ecx +; CHECK-NEXT: shrl %cl, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, %b ret i32 %c @@ -87,9 +95,9 @@ ; CHECK-LABEL: lshr_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: def $cl killed $rcx -; CHECK-NEXT: shrq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: def $cl killed $rcx +; CHECK-NEXT: shrq %cl, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, %b ret i64 %c @@ -99,8 +107,10 @@ ; CHECK-LABEL: ashr_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: sarb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: sarb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, %b ret i8 %c @@ -110,9 +120,11 @@ ; CHECK-LABEL: ashr_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cx -; CHECK-NEXT: sarw %cl, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: ## kill: def $cl killed $cx +; CHECK-NEXT: sarw %cl, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, %b ret i16 %c @@ -122,9 +134,9 @@ ; CHECK-LABEL: ashr_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: ## kill: def $cl killed $ecx -; CHECK-NEXT: sarl %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: ## kill: def $cl killed $ecx +; CHECK-NEXT: sarl %cl, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, %b ret i32 %c @@ -134,9 +146,9 @@ ; CHECK-LABEL: ashr_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: ## kill: def $cl killed $rcx -; CHECK-NEXT: sarq %cl, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: ## kill: def $cl killed $rcx +; CHECK-NEXT: sarq %cl, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, %b ret i64 %c @@ -145,8 +157,9 @@ define i8 @shl_imm1_i8(i8 %a) { ; CHECK-LABEL: shl_imm1_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = shl i8 %a, 1 ret i8 %c @@ -185,8 +198,9 @@ define i8 @lshr_imm1_i8(i8 %a) { ; CHECK-LABEL: lshr_imm1_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, 1 ret i8 %c @@ -195,8 +209,9 @@ define i16 @lshr_imm1_i16(i16 %a) { ; CHECK-LABEL: lshr_imm1_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrw $1, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrw $1, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, 1 ret i16 %c @@ -205,8 +220,8 @@ define i32 @lshr_imm1_i32(i32 %a) { ; CHECK-LABEL: lshr_imm1_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $1, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 1 ret i32 %c @@ -215,8 +230,8 @@ define i64 @lshr_imm1_i64(i64 %a) { ; CHECK-LABEL: lshr_imm1_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrq $1, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $1, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 1 ret i64 %c @@ -225,8 +240,9 @@ define i8 @ashr_imm1_i8(i8 %a) { ; CHECK-LABEL: ashr_imm1_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, 1 ret i8 %c @@ -235,8 +251,9 @@ define i16 @ashr_imm1_i16(i16 %a) { ; CHECK-LABEL: ashr_imm1_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarw $1, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarw $1, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, 1 ret i16 %c @@ -245,8 +262,8 @@ define i32 @ashr_imm1_i32(i32 %a) { ; CHECK-LABEL: ashr_imm1_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $1, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 1 ret i32 %c @@ -255,8 +272,8 @@ define i64 @ashr_imm1_i64(i64 %a) { ; CHECK-LABEL: ashr_imm1_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarq $1, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $1, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 1 ret i64 %c @@ -265,8 +282,9 @@ define i8 @shl_imm4_i8(i8 %a) { ; CHECK-LABEL: shl_imm4_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = shl i8 %a, 4 ret i8 %c @@ -275,8 +293,9 @@ define i16 @shl_imm4_i16(i16 %a) { ; CHECK-LABEL: shl_imm4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlw $4, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = shl i16 %a, 4 ret i16 %c @@ -285,8 +304,8 @@ define i32 @shl_imm4_i32(i32 %a) { ; CHECK-LABEL: shl_imm4_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: shll $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $4, %eax ; CHECK-NEXT: retq %c = shl i32 %a, 4 ret i32 %c @@ -295,8 +314,8 @@ define i64 @shl_imm4_i64(i64 %a) { ; CHECK-LABEL: shl_imm4_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: shlq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq $4, %rax ; CHECK-NEXT: retq %c = shl i64 %a, 4 ret i64 %c @@ -305,8 +324,9 @@ define i8 @lshr_imm4_i8(i8 %a) { ; CHECK-LABEL: lshr_imm4_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrb $4, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, 4 ret i8 %c @@ -315,8 +335,9 @@ define i16 @lshr_imm4_i16(i16 %a) { ; CHECK-LABEL: lshr_imm4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrw $4, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, 4 ret i16 %c @@ -325,8 +346,8 @@ define i32 @lshr_imm4_i32(i32 %a) { ; CHECK-LABEL: lshr_imm4_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 4 ret i32 %c @@ -335,8 +356,8 @@ define i64 @lshr_imm4_i64(i64 %a) { ; CHECK-LABEL: lshr_imm4_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: shrq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $4, %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 4 ret i64 %c @@ -345,8 +366,9 @@ define i8 @ashr_imm4_i8(i8 %a) { ; CHECK-LABEL: ashr_imm4_i8: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarb $4, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $4, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, 4 ret i8 %c @@ -355,8 +377,9 @@ define i16 @ashr_imm4_i16(i16 %a) { ; CHECK-LABEL: ashr_imm4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarw $4, %di ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarw $4, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, 4 ret i16 %c @@ -365,8 +388,8 @@ define i32 @ashr_imm4_i32(i32 %a) { ; CHECK-LABEL: ashr_imm4_i32: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarl $4, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $4, %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 4 ret i32 %c @@ -375,8 +398,8 @@ define i64 @ashr_imm4_i64(i64 %a) { ; CHECK-LABEL: ashr_imm4_i64: ; CHECK: ## %bb.0: -; CHECK-NEXT: sarq $4, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $4, %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 4 ret i64 %c @@ -386,9 +409,10 @@ define i8 @PR36731(i8 %a) { ; CHECK-LABEL: PR36731: ; CHECK: ## %bb.0: -; CHECK-NEXT: movb $255, %cl -; CHECK-NEXT: shlb %cl, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movb $255, %cl +; CHECK-NEXT: shlb %cl, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %b = shl i8 %a, -1 ret i8 %b Index: test/CodeGen/X86/fast-isel-store.ll =================================================================== --- test/CodeGen/X86/fast-isel-store.ll +++ test/CodeGen/X86/fast-isel-store.ll @@ -11,8 +11,8 @@ define i32 @test_store_32(i32* nocapture %addr, i32 %value) { ; ALL32-LABEL: test_store_32: ; ALL32: # %bb.0: # %entry -; ALL32-NEXT: movl %esi, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movl %esi, (%rdi) ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_32: @@ -29,8 +29,9 @@ define i16 @test_store_16(i16* nocapture %addr, i16 %value) { ; ALL32-LABEL: test_store_16: ; ALL32: # %bb.0: # %entry -; ALL32-NEXT: movw %si, (%rdi) ; ALL32-NEXT: movl %esi, %eax +; ALL32-NEXT: movw %ax, (%rdi) +; ALL32-NEXT: # kill: def $ax killed $ax killed $eax ; ALL32-NEXT: retq ; ; ALL64-LABEL: test_store_16: Index: test/CodeGen/X86/fixup-bw-copy.ll =================================================================== --- test/CodeGen/X86/fixup-bw-copy.ll +++ test/CodeGen/X86/fixup-bw-copy.ll @@ -7,15 +7,11 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" define i8 @test_movb(i8 %a0) { -; BWON64-LABEL: test_movb: -; BWON64: # %bb.0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movb: -; BWOFF64: # %bb.0: -; BWOFF64-NEXT: movb %dil, %al -; BWOFF64-NEXT: retq +; X64-LABEL: test_movb: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq ; ; X32-LABEL: test_movb: ; X32: # %bb.0: @@ -25,15 +21,11 @@ } define i16 @test_movw(i16 %a0) { -; BWON64-LABEL: test_movw: -; BWON64: # %bb.0: -; BWON64-NEXT: movl %edi, %eax -; BWON64-NEXT: retq -; -; BWOFF64-LABEL: test_movw: -; BWOFF64: # %bb.0: -; BWOFF64-NEXT: movw %di, %ax -; BWOFF64-NEXT: retq +; X64-LABEL: test_movw: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq ; ; BWON32-LABEL: test_movw: ; BWON32: # %bb.0: Index: test/CodeGen/X86/fma.ll =================================================================== --- test/CodeGen/X86/fma.ll +++ test/CodeGen/X86/fma.ll @@ -1351,14 +1351,13 @@ ; FMACALL64-NEXT: ## xmm2 = xmm2[1,1,2,3] ; FMACALL64-NEXT: callq _fmaf ## encoding: [0xe8,A,A,A,A] ; FMACALL64-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel -; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload -; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x4c,0x24,0x40] -; FMACALL64-NEXT: unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8] -; FMACALL64-NEXT: ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload -; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x20] -; FMACALL64-NEXT: ## xmm1 = xmm1[0],mem[0] -; FMACALL64-NEXT: movaps %xmm1, %xmm3 ## encoding: [0x0f,0x28,0xd9] +; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload +; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x5c,0x24,0x40] +; FMACALL64-NEXT: unpcklps %xmm0, %xmm3 ## encoding: [0x0f,0x14,0xd8] +; FMACALL64-NEXT: ## xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; FMACALL64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Folded Reload +; FMACALL64-NEXT: ## encoding: [0x66,0x0f,0x14,0x5c,0x24,0x20] +; FMACALL64-NEXT: ## xmm3 = xmm3[0],mem[0] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x44,0x24,0x60] ; FMACALL64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload Index: test/CodeGen/X86/fold-vector-sext-crash2.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-crash2.ll +++ test/CodeGen/X86/fold-vector-sext-crash2.ll @@ -28,6 +28,7 @@ ; ; X64-LABEL: test_sext1: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) @@ -35,7 +36,6 @@ ; X64-NEXT: movq $-1, 48(%rdi) ; X64-NEXT: movq $-1, 40(%rdi) ; X64-NEXT: movq $-99, 32(%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = sext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -66,6 +66,7 @@ ; ; X64-LABEL: test_sext2: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) @@ -73,7 +74,6 @@ ; X64-NEXT: movq $-1, 48(%rdi) ; X64-NEXT: movq $-1, 40(%rdi) ; X64-NEXT: movq $-1999, 32(%rdi) # imm = 0xF831 -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = sext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -104,13 +104,13 @@ ; ; X64-LABEL: test_zext1: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 48(%rdi) ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: movq $0, 40(%rdi) ; X64-NEXT: movq $254, 32(%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = zext <2 x i8> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> @@ -141,13 +141,13 @@ ; ; X64-LABEL: test_zext2: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 48(%rdi) ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: movq $-1, 40(%rdi) ; X64-NEXT: movq $-2, 32(%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = zext <2 x i128> to <2 x i256> %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> Index: test/CodeGen/X86/funnel-shift-rot.ll =================================================================== --- test/CodeGen/X86/funnel-shift-rot.ll +++ test/CodeGen/X86/funnel-shift-rot.ll @@ -25,8 +25,9 @@ ; ; X64-AVX2-LABEL: rotl_i8_const_shift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: rolb $3, %dil ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: rolb $3, %al +; X64-AVX2-NEXT: # kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: retq %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) ret i8 %f @@ -44,8 +45,8 @@ ; ; X64-AVX2-LABEL: rotl_i64_const_shift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: rolq $3, %rdi ; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: rolq $3, %rax ; X64-AVX2-NEXT: retq %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) ret i64 %f @@ -62,8 +63,10 @@ ; X64-AVX2-LABEL: rotl_i16: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %esi, %ecx -; X64-AVX2-NEXT: rolw %cl, %di ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: rolw %cl, %ax +; X64-AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: retq %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) ret i16 %f @@ -80,8 +83,9 @@ ; X64-AVX2-LABEL: rotl_i32: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %esi, %ecx -; X64-AVX2-NEXT: roll %cl, %edi ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: roll %cl, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) ret i32 %f @@ -174,8 +178,9 @@ ; ; X64-AVX2-LABEL: rotr_i8_const_shift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: rorb $3, %dil ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: rorb $3, %al +; X64-AVX2-NEXT: # kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: retq %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) ret i8 %f @@ -190,8 +195,8 @@ ; ; X64-AVX2-LABEL: rotr_i32_const_shift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: rorl $3, %edi ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: rorl $3, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) ret i32 %f @@ -210,8 +215,10 @@ ; X64-AVX2-LABEL: rotr_i16: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %esi, %ecx -; X64-AVX2-NEXT: rorw %cl, %di ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: rorw %cl, %ax +; X64-AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: retq %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) ret i16 %f @@ -256,9 +263,10 @@ ; ; X64-AVX2-LABEL: rotr_i64: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: movl %esi, %ecx -; X64-AVX2-NEXT: rorq %cl, %rdi +; X64-AVX2-NEXT: movq %rsi, %rcx ; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-AVX2-NEXT: rorq %cl, %rax ; X64-AVX2-NEXT: retq %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f Index: test/CodeGen/X86/funnel-shift.ll =================================================================== --- test/CodeGen/X86/funnel-shift.ll +++ test/CodeGen/X86/funnel-shift.ll @@ -127,35 +127,36 @@ ; X64-AVX2-LABEL: fshl_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: pushq %rbx -; X64-AVX2-NEXT: movq %rdx, %r10 +; X64-AVX2-NEXT: movq %rsi, %r9 ; X64-AVX2-NEXT: movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF +; X64-AVX2-NEXT: andq %r8, %r9 +; X64-AVX2-NEXT: movl $37, %r10d +; X64-AVX2-NEXT: subq %rdx, %r10 +; X64-AVX2-NEXT: movq %rdx, %rsi ; X64-AVX2-NEXT: andq %r8, %rsi -; X64-AVX2-NEXT: movl $37, %r9d -; X64-AVX2-NEXT: subq %rdx, %r9 -; X64-AVX2-NEXT: andq %r8, %r10 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %r11 # imm = 0xDD67C8A60DD67C8B -; X64-AVX2-NEXT: movq %r10, %rax +; X64-AVX2-NEXT: movq %rsi, %rax ; X64-AVX2-NEXT: mulq %r11 ; X64-AVX2-NEXT: shrq $5, %rdx ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax ; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax -; X64-AVX2-NEXT: subq %rax, %r10 +; X64-AVX2-NEXT: subq %rax, %rsi ; X64-AVX2-NEXT: movq %rdi, %rbx -; X64-AVX2-NEXT: movl %r10d, %ecx +; X64-AVX2-NEXT: movl %esi, %ecx ; X64-AVX2-NEXT: shlq %cl, %rbx -; X64-AVX2-NEXT: andq %r9, %r8 +; X64-AVX2-NEXT: andq %r10, %r8 ; X64-AVX2-NEXT: movq %r8, %rax ; X64-AVX2-NEXT: mulq %r11 ; X64-AVX2-NEXT: shrq $5, %rdx ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax ; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax -; X64-AVX2-NEXT: subl %eax, %r9d -; X64-AVX2-NEXT: movl %r9d, %ecx -; X64-AVX2-NEXT: shrq %cl, %rsi -; X64-AVX2-NEXT: orq %rbx, %rsi -; X64-AVX2-NEXT: testq %r10, %r10 -; X64-AVX2-NEXT: cmoveq %rdi, %rsi -; X64-AVX2-NEXT: movq %rsi, %rax +; X64-AVX2-NEXT: subl %eax, %r10d +; X64-AVX2-NEXT: movl %r10d, %ecx +; X64-AVX2-NEXT: shrq %cl, %r9 +; X64-AVX2-NEXT: orq %rbx, %r9 +; X64-AVX2-NEXT: testq %rsi, %rsi +; X64-AVX2-NEXT: cmoveq %rdi, %r9 +; X64-AVX2-NEXT: movq %r9, %rax ; X64-AVX2-NEXT: popq %rbx ; X64-AVX2-NEXT: retq %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) @@ -186,8 +187,8 @@ ; ; X64-AVX2-LABEL: fshl_i32_const_shift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldl $9, %esi, %edi ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shldl $9, %esi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) ret i32 %f @@ -205,8 +206,8 @@ ; ; X64-AVX2-LABEL: fshl_i32_const_overshift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldl $9, %esi, %edi ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shldl $9, %esi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) ret i32 %f @@ -226,8 +227,8 @@ ; ; X64-AVX2-LABEL: fshl_i64_const_overshift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldq $41, %rsi, %rdi ; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: shldq $41, %rsi, %rax ; X64-AVX2-NEXT: retq %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) ret i64 %f @@ -275,18 +276,18 @@ ; ; X64-AVX2-LABEL: fshr_i32: ; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: movl %edi, %eax ; X64-AVX2-NEXT: movl $32, %r8d ; X64-AVX2-NEXT: subl %edx, %r8d ; X64-AVX2-NEXT: andl $31, %edx -; X64-AVX2-NEXT: movl %esi, %eax +; X64-AVX2-NEXT: movl %esi, %edi ; X64-AVX2-NEXT: movl %edx, %ecx -; X64-AVX2-NEXT: shrl %cl, %eax +; X64-AVX2-NEXT: shrl %cl, %edi ; X64-AVX2-NEXT: movl %r8d, %ecx -; X64-AVX2-NEXT: shll %cl, %edi -; X64-AVX2-NEXT: orl %eax, %edi +; X64-AVX2-NEXT: shll %cl, %eax +; X64-AVX2-NEXT: orl %edi, %eax ; X64-AVX2-NEXT: testl %edx, %edx -; X64-AVX2-NEXT: cmovel %esi, %edi -; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: cmovel %esi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) ret i32 %f @@ -361,35 +362,36 @@ ; X64-AVX2-LABEL: fshr_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: pushq %rbx -; X64-AVX2-NEXT: movq %rdx, %r10 -; X64-AVX2-NEXT: movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF +; X64-AVX2-NEXT: movq %rdi, %r8 +; X64-AVX2-NEXT: movabsq $137438953471, %r9 # imm = 0x1FFFFFFFFF ; X64-AVX2-NEXT: movq %rsi, %r11 -; X64-AVX2-NEXT: andq %r8, %r11 -; X64-AVX2-NEXT: movl $37, %r9d -; X64-AVX2-NEXT: subq %rdx, %r9 -; X64-AVX2-NEXT: andq %r8, %r10 +; X64-AVX2-NEXT: andq %r9, %r11 +; X64-AVX2-NEXT: movl $37, %r10d +; X64-AVX2-NEXT: subq %rdx, %r10 +; X64-AVX2-NEXT: movq %rdx, %rdi +; X64-AVX2-NEXT: andq %r9, %rdi ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rbx # imm = 0xDD67C8A60DD67C8B -; X64-AVX2-NEXT: movq %r10, %rax +; X64-AVX2-NEXT: movq %rdi, %rax ; X64-AVX2-NEXT: mulq %rbx ; X64-AVX2-NEXT: shrq $5, %rdx ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax ; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax -; X64-AVX2-NEXT: subq %rax, %r10 -; X64-AVX2-NEXT: movl %r10d, %ecx +; X64-AVX2-NEXT: subq %rax, %rdi +; X64-AVX2-NEXT: movl %edi, %ecx ; X64-AVX2-NEXT: shrq %cl, %r11 -; X64-AVX2-NEXT: andq %r9, %r8 -; X64-AVX2-NEXT: movq %r8, %rax +; X64-AVX2-NEXT: andq %r10, %r9 +; X64-AVX2-NEXT: movq %r9, %rax ; X64-AVX2-NEXT: mulq %rbx ; X64-AVX2-NEXT: shrq $5, %rdx ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax ; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax -; X64-AVX2-NEXT: subl %eax, %r9d -; X64-AVX2-NEXT: movl %r9d, %ecx -; X64-AVX2-NEXT: shlq %cl, %rdi -; X64-AVX2-NEXT: orq %r11, %rdi -; X64-AVX2-NEXT: testq %r10, %r10 -; X64-AVX2-NEXT: cmoveq %rsi, %rdi -; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: subl %eax, %r10d +; X64-AVX2-NEXT: movl %r10d, %ecx +; X64-AVX2-NEXT: shlq %cl, %r8 +; X64-AVX2-NEXT: orq %r11, %r8 +; X64-AVX2-NEXT: testq %rdi, %rdi +; X64-AVX2-NEXT: cmoveq %rsi, %r8 +; X64-AVX2-NEXT: movq %r8, %rax ; X64-AVX2-NEXT: popq %rbx ; X64-AVX2-NEXT: retq %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) @@ -420,8 +422,8 @@ ; ; X64-AVX2-LABEL: fshr_i32_const_shift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldl $23, %esi, %edi ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shldl $23, %esi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) ret i32 %f @@ -439,8 +441,8 @@ ; ; X64-AVX2-LABEL: fshr_i32_const_overshift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldl $23, %esi, %edi ; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shldl $23, %esi, %eax ; X64-AVX2-NEXT: retq %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) ret i32 %f @@ -460,8 +462,8 @@ ; ; X64-AVX2-LABEL: fshr_i64_const_overshift: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldq $23, %rsi, %rdi ; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: shldq $23, %rsi, %rax ; X64-AVX2-NEXT: retq %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) ret i64 %f Index: test/CodeGen/X86/ghc-cc64.ll =================================================================== --- test/CodeGen/X86/ghc-cc64.ll +++ test/CodeGen/X86/ghc-cc64.ll @@ -22,8 +22,8 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rdi, %r13 - ; CHECK-NEXT: movq %rsi, %rbp + ; CHECK: movq %rsi, %rbp + ; CHECK-NEXT: movq %rdi, %r13 ; CHECK-NEXT: callq addtwo %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) ; CHECK: callq foo Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -4,11 +4,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: - ; CHECK: movq %rsi, %rax + ; CHECK: movq %rsi, %rdx ; CHECK-NEXT: movl $8, %ecx ; CHECK-NEXT: movl $9, %r8d ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 Index: test/CodeGen/X86/i128-mul.ll =================================================================== --- test/CodeGen/X86/i128-mul.ll +++ test/CodeGen/X86/i128-mul.ll @@ -336,17 +336,17 @@ ; X64-BMI-NEXT: movq %rcx, %r8 ; X64-BMI-NEXT: movq %rdx, %r9 ; X64-BMI-NEXT: xorl %r10d, %r10d -; X64-BMI-NEXT: xorl %eax, %eax +; X64-BMI-NEXT: xorl %ecx, %ecx ; X64-BMI-NEXT: .p2align 4, 0x90 ; X64-BMI-NEXT: .LBB1_2: # %for.body ; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-BMI-NEXT: movq %r8, %rdx -; X64-BMI-NEXT: mulxq (%r9,%rax,8), %rcx, %rdx -; X64-BMI-NEXT: addq %r10, %rcx +; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx +; X64-BMI-NEXT: addq %r10, %rax ; X64-BMI-NEXT: adcq $0, %rdx -; X64-BMI-NEXT: movq %rcx, (%rsi,%rax,8) -; X64-BMI-NEXT: incq %rax -; X64-BMI-NEXT: cmpq %rax, %rdi +; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-BMI-NEXT: incq %rcx +; X64-BMI-NEXT: cmpq %rcx, %rdi ; X64-BMI-NEXT: movq %rdx, %r10 ; X64-BMI-NEXT: jne .LBB1_2 ; X64-BMI-NEXT: .LBB1_3: # %for.end Index: test/CodeGen/X86/iabs.ll =================================================================== --- test/CodeGen/X86/iabs.ll +++ test/CodeGen/X86/iabs.ll @@ -22,10 +22,11 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: sarb $7, %al -; X64-NEXT: addb %al, %dil -; X64-NEXT: xorb %al, %dil -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarb $7, %cl +; X64-NEXT: addb %cl, %al +; X64-NEXT: xorb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 Index: test/CodeGen/X86/imul.ll =================================================================== --- test/CodeGen/X86/imul.ll +++ test/CodeGen/X86/imul.ll @@ -39,8 +39,8 @@ define i32 @mul4096_32(i32 %A) { ; X64-LABEL: mul4096_32: ; X64: # %bb.0: -; X64-NEXT: shll $12, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $12, %eax ; X64-NEXT: retq ; ; X86-LABEL: mul4096_32: @@ -55,8 +55,8 @@ define i64 @mul4096_64(i64 %A) { ; X64-LABEL: mul4096_64: ; X64: # %bb.0: -; X64-NEXT: shlq $12, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $12, %rax ; X64-NEXT: retq ; ; X86-LABEL: mul4096_64: @@ -73,9 +73,9 @@ define i32 @mulmin4096_32(i32 %A) { ; X64-LABEL: mulmin4096_32: ; X64: # %bb.0: -; X64-NEXT: shll $12, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $12, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X86-LABEL: mulmin4096_32: @@ -91,9 +91,9 @@ define i64 @mulmin4096_64(i64 %A) { ; X64-LABEL: mulmin4096_64: ; X64: # %bb.0: -; X64-NEXT: shlq $12, %rdi -; X64-NEXT: negq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shlq $12, %rax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X86-LABEL: mulmin4096_64: @@ -268,8 +268,8 @@ define i32 @mul4294967295_32(i32 %A) { ; X64-LABEL: mul4294967295_32: ; X64: # %bb.0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X86-LABEL: mul4294967295_32: @@ -284,8 +284,8 @@ define i64 @mul18446744073709551615_64(i64 %A) { ; X64-LABEL: mul18446744073709551615_64: ; X64: # %bb.0: -; X64-NEXT: negq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X86-LABEL: mul18446744073709551615_64: @@ -323,9 +323,9 @@ ; X64-LABEL: test1: ; X64: # %bb.0: # %entry ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $5, %eax -; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: shll $5, %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: test1: @@ -412,9 +412,9 @@ ; X64-LABEL: test5: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $5, %rax -; X64-NEXT: subq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: shlq $5, %rcx +; X64-NEXT: subq %rcx, %rax ; X64-NEXT: retq ; ; X86-LABEL: test5: @@ -530,9 +530,9 @@ ; X64-LABEL: testNegOverflow: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $63, %rax -; X64-NEXT: subq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: shlq $63, %rcx +; X64-NEXT: subq %rcx, %rax ; X64-NEXT: retq ; ; X86-LABEL: testNegOverflow: Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -88,6 +88,8 @@ ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -101,12 +103,11 @@ ; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %ch, %cl ; CHECK-NEXT: shll %cl, %ebx ; CHECK-NEXT: shldl %cl, %edx, %ebp -; CHECK-NEXT: testb $32, %cl +; CHECK-NEXT: testb $32, %ch ; CHECK-NEXT: je .LBB4_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: movl %ebx, %ebp Index: test/CodeGen/X86/legalize-shl-vec.ll =================================================================== --- test/CodeGen/X86/legalize-shl-vec.ll +++ test/CodeGen/X86/legalize-shl-vec.ll @@ -42,21 +42,21 @@ ; ; X64-LABEL: test_shl: ; X64: # %bb.0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: shldq $2, %rax, %rcx -; X64-NEXT: shldq $2, %rdx, %rax -; X64-NEXT: shldq $2, %r9, %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: shldq $2, %rcx, %rdx +; X64-NEXT: shldq $2, %rsi, %rcx +; X64-NEXT: shldq $2, %r9, %rsi ; X64-NEXT: shlq $2, %r9 -; X64-NEXT: movq %rcx, 56(%rdi) -; X64-NEXT: movq %rax, 48(%rdi) -; X64-NEXT: movq %rdx, 40(%rdi) +; X64-NEXT: movq %rdx, 56(%rdi) +; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Amt = insertelement <2 x i256> , i256 -1, i32 0 %Out = shl <2 x i256> %In, %Amt @@ -88,7 +88,7 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: shldl $28, %eax, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: shldl $28, %esi, %eax ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: shldl $28, %edi, %esi @@ -101,7 +101,7 @@ ; X32-NEXT: shrl $4, %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %edx, 60(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl %edx, 56(%eax) ; X32-NEXT: movl (%esp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, 52(%eax) @@ -132,21 +132,21 @@ ; ; X64-LABEL: test_srl: ; X64: # %bb.0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: shrdq $4, %rdx, %r9 -; X64-NEXT: shrdq $4, %rax, %rdx -; X64-NEXT: shrdq $4, %rcx, %rax -; X64-NEXT: shrq $4, %rcx -; X64-NEXT: movq %rcx, 56(%rdi) -; X64-NEXT: movq %rax, 48(%rdi) -; X64-NEXT: movq %rdx, 40(%rdi) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: shrdq $4, %rsi, %r9 +; X64-NEXT: shrdq $4, %rcx, %rsi +; X64-NEXT: shrdq $4, %rdx, %rcx +; X64-NEXT: shrq $4, %rdx +; X64-NEXT: movq %rdx, 56(%rdi) +; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Amt = insertelement <2 x i256> , i256 -1, i32 0 %Out = lshr <2 x i256> %In, %Amt @@ -178,7 +178,7 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: shldl $26, %eax, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: shldl $26, %esi, %eax ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: shldl $26, %edi, %esi @@ -191,7 +191,7 @@ ; X32-NEXT: sarl $6, %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %edx, 60(%eax) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl %edx, 56(%eax) ; X32-NEXT: movl (%esp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, 52(%eax) @@ -224,23 +224,23 @@ ; ; X64-LABEL: test_sra: ; X64: # %bb.0: -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: shrdq $6, %rdx, %r9 -; X64-NEXT: shrdq $6, %rax, %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: shrdq $6, %rsi, %r9 +; X64-NEXT: shrdq $6, %rcx, %rsi ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: shrdq $6, %rcx, %rax -; X64-NEXT: sarq $6, %rcx -; X64-NEXT: movq %rcx, 56(%rdi) -; X64-NEXT: movq %rax, 48(%rdi) -; X64-NEXT: movq %rdx, 40(%rdi) +; X64-NEXT: shrdq $6, %rdx, %rcx +; X64-NEXT: sarq $6, %rdx +; X64-NEXT: movq %rdx, 56(%rdi) +; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: movq %r8, 24(%rdi) ; X64-NEXT: movq %r8, 16(%rdi) ; X64-NEXT: movq %r8, 8(%rdi) ; X64-NEXT: movq %r8, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Amt = insertelement <2 x i256> , i256 -1, i32 0 %Out = ashr <2 x i256> %In, %Amt Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -62,10 +62,11 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ands_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: andb %cl, %dl -; CHECK-NEXT: andb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: andb %dil, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = and i8 %x2, %t0 @@ -78,10 +79,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ands_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: andl %ecx, %edx -; CHECK-NEXT: andl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = and i32 %x2, %t0 @@ -92,10 +93,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ands_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: andq %rcx, %rdx -; CHECK-NEXT: andq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = and i64 %x2, %t0 @@ -109,10 +110,11 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: orb %cl, %dl -; CHECK-NEXT: orb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = or i8 %x2, %t0 @@ -125,10 +127,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: orl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = or i32 %x2, %t0 @@ -139,10 +141,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = or i64 %x2, %t0 @@ -156,10 +158,11 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_xors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: xorb %cl, %dl -; CHECK-NEXT: xorb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: xorb %cl, %al +; CHECK-NEXT: xorb %dil, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = xor i8 %x2, %t0 @@ -172,10 +175,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_xors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: xorl %ecx, %edx -; CHECK-NEXT: xorl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = xor i32 %x2, %t0 @@ -186,10 +189,10 @@ define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_xors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: xorq %rcx, %rdx -; CHECK-NEXT: xorq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = xor i64 %x2, %t0 Index: test/CodeGen/X86/machine-cp.ll =================================================================== --- test/CodeGen/X86/machine-cp.ll +++ test/CodeGen/X86/machine-cp.ll @@ -103,30 +103,29 @@ ; CHECK: ## %bb.0: ## %bb ; CHECK-NEXT: movaps %xmm3, %xmm9 ; CHECK-NEXT: movaps %xmm2, %xmm8 -; CHECK-NEXT: movaps %xmm1, %xmm6 ; CHECK-NEXT: movaps %xmm0, %xmm7 ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movaps %xmm3, %xmm1 -; CHECK-NEXT: cmpltps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm4 +; CHECK-NEXT: movaps %xmm3, %xmm2 +; CHECK-NEXT: cmpltps %xmm0, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm4 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm4 ; CHECK-NEXT: movaps %xmm4, %xmm10 -; CHECK-NEXT: andnps %xmm1, %xmm10 -; CHECK-NEXT: movaps %xmm2, %xmm1 -; CHECK-NEXT: cmpltps %xmm0, %xmm1 +; CHECK-NEXT: andnps %xmm2, %xmm10 +; CHECK-NEXT: movaps %xmm8, %xmm5 +; CHECK-NEXT: cmpltps %xmm0, %xmm5 ; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12] -; CHECK-NEXT: movaps %xmm1, %xmm3 -; CHECK-NEXT: orps %xmm11, %xmm3 -; CHECK-NEXT: movaps %xmm3, %xmm14 -; CHECK-NEXT: andnps %xmm1, %xmm14 -; CHECK-NEXT: cvttps2dq %xmm6, %xmm12 -; CHECK-NEXT: cmpltps %xmm0, %xmm6 +; CHECK-NEXT: movaps %xmm5, %xmm2 +; CHECK-NEXT: orps %xmm11, %xmm2 +; CHECK-NEXT: movaps %xmm2, %xmm14 +; CHECK-NEXT: andnps %xmm5, %xmm14 +; CHECK-NEXT: cvttps2dq %xmm1, %xmm12 +; CHECK-NEXT: cmpltps %xmm0, %xmm1 ; CHECK-NEXT: movaps {{.*#+}} xmm13 = [5,6,7,8] -; CHECK-NEXT: movaps %xmm6, %xmm2 -; CHECK-NEXT: orps %xmm13, %xmm2 -; CHECK-NEXT: movaps %xmm2, %xmm5 -; CHECK-NEXT: andnps %xmm6, %xmm5 -; CHECK-NEXT: cvttps2dq %xmm7, %xmm6 +; CHECK-NEXT: movaps %xmm1, %xmm6 +; CHECK-NEXT: orps %xmm13, %xmm6 +; CHECK-NEXT: movaps %xmm6, %xmm5 +; CHECK-NEXT: andnps %xmm1, %xmm5 +; CHECK-NEXT: cvttps2dq %xmm7, %xmm3 ; CHECK-NEXT: cmpltps %xmm0, %xmm7 ; CHECK-NEXT: movaps {{.*#+}} xmm15 = [1,2,3,4] ; CHECK-NEXT: movaps %xmm7, %xmm0 @@ -134,30 +133,29 @@ ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: andnps %xmm7, %xmm1 ; CHECK-NEXT: andps %xmm15, %xmm0 -; CHECK-NEXT: cvtdq2ps %xmm6, %xmm6 -; CHECK-NEXT: andps %xmm6, %xmm0 -; CHECK-NEXT: movaps {{.*#+}} xmm6 = [1,1,1,1] -; CHECK-NEXT: andps %xmm6, %xmm1 +; CHECK-NEXT: cvtdq2ps %xmm3, %xmm3 +; CHECK-NEXT: andps %xmm3, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1] +; CHECK-NEXT: andps %xmm3, %xmm1 ; CHECK-NEXT: orps %xmm1, %xmm0 -; CHECK-NEXT: andps %xmm13, %xmm2 +; CHECK-NEXT: andps %xmm13, %xmm6 ; CHECK-NEXT: cvtdq2ps %xmm12, %xmm1 -; CHECK-NEXT: andps %xmm1, %xmm2 -; CHECK-NEXT: andps %xmm6, %xmm5 -; CHECK-NEXT: orps %xmm5, %xmm2 -; CHECK-NEXT: andps %xmm11, %xmm3 +; CHECK-NEXT: andps %xmm1, %xmm6 +; CHECK-NEXT: andps %xmm3, %xmm5 +; CHECK-NEXT: orps %xmm5, %xmm6 +; CHECK-NEXT: andps %xmm11, %xmm2 ; CHECK-NEXT: cvttps2dq %xmm8, %xmm1 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK-NEXT: andps %xmm1, %xmm3 -; CHECK-NEXT: andps %xmm6, %xmm14 -; CHECK-NEXT: orps %xmm14, %xmm3 -; CHECK-NEXT: andps %xmm6, %xmm10 +; CHECK-NEXT: andps %xmm1, %xmm2 +; CHECK-NEXT: andps %xmm3, %xmm14 +; CHECK-NEXT: orps %xmm14, %xmm2 +; CHECK-NEXT: andps %xmm3, %xmm10 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm4 ; CHECK-NEXT: cvttps2dq %xmm9, %xmm1 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 ; CHECK-NEXT: andps %xmm1, %xmm4 ; CHECK-NEXT: orps %xmm10, %xmm4 -; CHECK-NEXT: movaps %xmm2, %xmm1 -; CHECK-NEXT: movaps %xmm3, %xmm2 +; CHECK-NEXT: movaps %xmm6, %xmm1 ; CHECK-NEXT: movaps %xmm4, %xmm3 ; CHECK-NEXT: retq bb: Index: test/CodeGen/X86/machine-cse.ll =================================================================== --- test/CodeGen/X86/machine-cse.ll +++ test/CodeGen/X86/machine-cse.ll @@ -136,21 +136,21 @@ ; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: je .LBB3_4 ; CHECK-NEXT: # %bb.1: # %preheader -; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB3_2: # %do.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl %eax, %esi +; CHECK-NEXT: cmpl %edx, %esi ; CHECK-NEXT: je .LBB3_5 ; CHECK-NEXT: # %bb.3: # %do.cond ; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: incq %rdi +; CHECK-NEXT: incq %rax ; CHECK-NEXT: decq %rcx ; CHECK-NEXT: jne .LBB3_2 ; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .LBB3_5: # %return -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %cmp = icmp eq i64 %n, 0 Index: test/CodeGen/X86/madd.ll =================================================================== --- test/CodeGen/X86/madd.ll +++ test/CodeGen/X86/madd.ll @@ -2225,6 +2225,7 @@ define <32 x i32> @jumbled_indices32(<64 x i16> %A, <64 x i16> %B) { ; SSE2-LABEL: jumbled_indices32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm2 @@ -2241,7 +2242,6 @@ ; SSE2-NEXT: movdqa %xmm2, 32(%rdi) ; SSE2-NEXT: movdqa %xmm1, 16(%rdi) ; SSE2-NEXT: movdqa %xmm0, (%rdi) -; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: retq ; ; AVX1-LABEL: jumbled_indices32: @@ -2443,6 +2443,7 @@ define <32 x i32> @pmaddwd_1024(<64 x i16>* %Aptr, <64 x i16>* %Bptr) { ; SSE2-LABEL: pmaddwd_1024: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa 112(%rsi), %xmm0 ; SSE2-NEXT: movdqa 96(%rsi), %xmm1 ; SSE2-NEXT: movdqa 80(%rsi), %xmm2 @@ -2467,7 +2468,6 @@ ; SSE2-NEXT: movdqa %xmm6, 32(%rdi) ; SSE2-NEXT: movdqa %xmm5, 16(%rdi) ; SSE2-NEXT: movdqa %xmm4, (%rdi) -; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: retq ; ; AVX1-LABEL: pmaddwd_1024: Index: test/CodeGen/X86/mask-negated-bool.ll =================================================================== --- test/CodeGen/X86/mask-negated-bool.ll +++ test/CodeGen/X86/mask-negated-bool.ll @@ -4,8 +4,8 @@ define i32 @mask_negated_zext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_zext_bool1: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = zext i1 %x to i32 %neg = sub i32 0, %ext @@ -38,8 +38,8 @@ define i32 @mask_negated_sext_bool1(i1 %x) { ; CHECK-LABEL: mask_negated_sext_bool1: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %ext = sext i1 %x to i32 %neg = sub i32 0, %ext Index: test/CodeGen/X86/misched-matmul.ll =================================================================== --- test/CodeGen/X86/misched-matmul.ll +++ test/CodeGen/X86/misched-matmul.ll @@ -10,7 +10,7 @@ ; more complex cases. ; ; CHECK: @wrap_mul4 -; CHECK: 23 regalloc - Number of spills inserted +; CHECK: 25 regalloc - Number of spills inserted define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 { entry: Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -11,6 +11,7 @@ ; X64-LABEL: test_mul_by_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 1 ret i16 %mul @@ -297,8 +298,9 @@ ; ; X64-LABEL: test_mul_by_16: ; X64: # %bb.0: -; X64-NEXT: shll $4, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $4, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 16 ret i16 %mul @@ -632,8 +634,9 @@ ; ; X64-LABEL: test_mul_by_32: ; X64: # %bb.0: -; X64-NEXT: shll $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 32 ret i16 %mul Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -787,14 +787,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -805,26 +805,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $4, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul @@ -1633,14 +1633,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1651,26 +1651,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] ; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shll $5, %eax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul @@ -2200,18 +2200,18 @@ ; ; X64-HSW-LABEL: mul_neg_fold: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %eax, %esi # sched: [1:0.25] ; X64-HSW-NEXT: movl %esi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-HSW-NEXT: subl %ecx, %eax # sched: [1:0.25] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: mul_neg_fold: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: subl %eax, %esi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [2:1.00] ; X64-JAG-NEXT: movl %esi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %ecx, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: mul_neg_fold: @@ -2235,9 +2235,9 @@ ; X64-SLM-LABEL: mul_neg_fold: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: subl %eax, %esi # sched: [1:0.50] ; X64-SLM-NEXT: movl %esi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:1.00] +; X64-SLM-NEXT: subl %ecx, %eax # sched: [1:0.50] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: mul_neg_fold: Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -811,14 +811,14 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_16: @@ -831,26 +831,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $4, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul @@ -1716,14 +1716,14 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_32: @@ -1736,26 +1736,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: retq # sched: [7:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] ; JAG-NOOPT-NEXT: retq # sched: [4:1.00] ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] ; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] ; X64-SLM-NEXT: retq # sched: [4:1.00] ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] ; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: shlq $5, %rax # sched: [1:1.00] ; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -774,14 +774,15 @@ ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl %ebp, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload -; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edx, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 88(%eax), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -789,34 +790,35 @@ ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %esi -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl %ebp, %ebx +; X32-NEXT: addl %eax, %ebx +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: adcl %ebx, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl %edi, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 84(%eax), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -860,34 +862,35 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 68(%ecx), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl 68(%ebp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl %ebx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %edi, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 72(%eax), %eax +; X32-NEXT: movl 72(%ebp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %edx +; X32-NEXT: movl %ebx, %edx +; X32-NEXT: movl %ebx, %ebp ; X32-NEXT: addl %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl %esi, %ebx @@ -1167,14 +1170,13 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %ebp @@ -1182,7 +1184,6 @@ ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1191,17 +1192,17 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl %ebx, %edx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %ebx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %edx, %eax @@ -1209,12 +1210,12 @@ ; X32-NEXT: setb %dl ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movzbl %dl, %eax ; X32-NEXT: adcl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %esi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1437,29 +1438,29 @@ ; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %esi, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -1477,7 +1478,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -2431,6 +2432,7 @@ ; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ebx, %edi ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -2449,27 +2451,25 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -2647,30 +2647,30 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -2682,8 +2682,7 @@ ; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx @@ -2694,7 +2693,7 @@ ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -3360,29 +3359,29 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: movzbl %bl, %eax @@ -3479,20 +3478,20 @@ ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp @@ -3513,30 +3512,30 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -4603,36 +4602,37 @@ ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %ebx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax @@ -4666,8 +4666,7 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -4735,31 +4734,30 @@ ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %ebp ; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi +; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi @@ -4772,7 +4770,7 @@ ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -4796,8 +4794,8 @@ ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebp, %edi +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload @@ -4817,7 +4815,7 @@ ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl %esi, %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -4836,41 +4834,41 @@ ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %edi +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ebp, %edx +; X32-NEXT: addl %edi, %edx ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %edi, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %edi, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload @@ -4880,7 +4878,7 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx @@ -5647,7 +5645,6 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, %esi ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi @@ -5660,19 +5657,19 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx @@ -5818,7 +5815,6 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %esi ; X32-NEXT: mull %ebp ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx @@ -5831,19 +5827,19 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp @@ -6003,20 +5999,19 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl 104(%ebp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 104(%eax), %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax @@ -6029,7 +6024,8 @@ ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl 108(%ebp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 108(%eax), %esi ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6057,8 +6053,7 @@ ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi @@ -6110,12 +6105,11 @@ ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: imull %eax, %ecx +; X32-NEXT: imull %eax, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: addl %ebx, %edx ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl %esi, %ebx @@ -6161,12 +6155,11 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl 124(%edx), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 124(%edi), %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl 120(%edx), %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl 120(%edi), %esi ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx @@ -6230,25 +6223,25 @@ ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %ebp +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx @@ -6264,26 +6257,26 @@ ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %ebp -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx @@ -6321,51 +6314,51 @@ ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebp, %edi ; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -6375,20 +6368,19 @@ ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: imull %eax, %ecx +; X32-NEXT: imull %eax, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: addl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: imull %ebp, %esi ; X32-NEXT: addl %edx, %esi @@ -6533,12 +6525,12 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6572,12 +6564,12 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6704,6 +6696,7 @@ ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: adcq %rdi, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: movzbl %bl, %ebx @@ -6713,17 +6706,16 @@ ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: addq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: movq %rdi, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rbx, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp @@ -6733,7 +6725,7 @@ ; X64-NEXT: movq 8(%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rbp -; X64-NEXT: xorl %r11d, %r11d +; X64-NEXT: xorl %r9d, %r9d ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rcx, %r15 ; X64-NEXT: movq %rdx, %rbp @@ -6749,22 +6741,21 @@ ; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: addq %rax, %r14 -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq %rbp, %r14 -; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: adcq %rdx, %rax +; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: adcq %rbx, %rax +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %r9, %rax -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rcx, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%r10), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %r8d, %r8d @@ -6772,44 +6763,44 @@ ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 32(%r13), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r8 -; X64-NEXT: xorl %r8d, %r8d +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq %rbx, %r8 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %r11, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdi, %r11 ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %r15, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r14, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %r11, %rax +; X64-NEXT: movq %r12, %rax +; X64-NEXT: adcq %r9, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r11, %rdi +; X64-NEXT: adcq %rbp, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbp, %rdi ; X64-NEXT: movq 8(%r10), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rsi, %r11 -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rsi, %r12 +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rbx, %r11 +; X64-NEXT: addq %rbx, %r12 ; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl @@ -6818,92 +6809,91 @@ ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%r10), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: addq %rax, %r8 +; X64-NEXT: movq %r8, %rcx +; X64-NEXT: addq %rax, %rcx ; X64-NEXT: movq %rsi, %r10 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: addq %rbp, %r8 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r10 -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r9, %rdx +; X64-NEXT: movq %r8, %rdx +; X64-NEXT: movq %r8, %r14 +; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill +; X64-NEXT: addq %r11, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r11, %r8 -; X64-NEXT: adcq %r11, %r15 +; X64-NEXT: movq %r12, %rsi +; X64-NEXT: adcq %r12, %r15 ; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: adcq %r10, %rdi ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq 40(%rsi), %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq 40(%rdi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %rdi +; X64-NEXT: xorl %r9d, %r9d +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r13, %rdi -; X64-NEXT: adcq %r9, %rbp +; X64-NEXT: addq %r13, %rcx +; X64-NEXT: adcq %r11, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp -; X64-NEXT: movzbl %bl, %r11d -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: movq 48(%rsi), %rax +; X64-NEXT: movzbl %bl, %ebx +; X64-NEXT: adcq %rdx, %rbx +; X64-NEXT: movq 48(%rdi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r13, %rbx -; X64-NEXT: addq %rax, %rbx -; X64-NEXT: movq %r9, %rsi -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rbp, %rbx -; X64-NEXT: adcq %r11, %rsi +; X64-NEXT: movq %r13, %r12 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq %r11, %rdi +; X64-NEXT: adcq %rdx, %rdi +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rbx, %rdi ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r13, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r8 +; X64-NEXT: addq %r13, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r12, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbx, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r10 +; X64-NEXT: adcq %rdi, %r10 ; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: addq %r13, %rax -; X64-NEXT: movq (%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: adcq %r11, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, %r10 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %r11 ; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx @@ -6918,7 +6908,7 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: movq %rdi, %r13 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx @@ -6930,26 +6920,26 @@ ; X64-NEXT: adcq %rdx, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rsi @@ -6957,49 +6947,47 @@ ; X64-NEXT: adcq %rax, %r13 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: addq %r14, %rsi ; X64-NEXT: adcq %r8, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r10, %rbx -; X64-NEXT: movq %r10, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %r11, %rbx +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %rcx +; X64-NEXT: movq 24(%rax), %r9 ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 ; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: setb %bl +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %dil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %rbp -; X64-NEXT: movq (%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: adcq %r9, %rbx +; X64-NEXT: movq %r10, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: adcq %r10, %rbx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: addq %rsi, %r10 -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rsi, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r13, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp @@ -7009,76 +6997,74 @@ ; X64-NEXT: setb %r15b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r11, %rsi -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r11, %rdi +; X64-NEXT: addq %r14, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r12 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r9, %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: addq %r14, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: adcq %r9, %r14 +; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: adcq %r10, %r9 ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: adcq %rdx, %r9 ; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rbx, %r11 +; X64-NEXT: adcq %rbx, %r14 ; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %r14 +; X64-NEXT: adcq $0, %r9 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rbp, %r15 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx @@ -7090,19 +7076,19 @@ ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -7110,7 +7096,7 @@ ; X64-NEXT: setb %cl ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq %rsi, %rbp -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx @@ -7122,11 +7108,11 @@ ; X64-NEXT: adcq %r15, %rsi ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r14 ; X64-NEXT: mulq %rdi @@ -7135,11 +7121,11 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil ; X64-NEXT: movq %r14, %rax @@ -7147,7 +7133,7 @@ ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: addq %r13, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload @@ -7155,65 +7141,63 @@ ; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: addq %rbx, %r11 ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rsi, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %r8, %rdi ; X64-NEXT: adcq %r10, %rbp -; X64-NEXT: setb %r9b +; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: addq %r8, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rcx -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: movq %rbx, %rsi -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: adcq %r14, %rsi -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %r15 +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %r13 +; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: adcq %rdx, %r13 ; X64-NEXT: addq %rdi, %r11 -; X64-NEXT: adcq %rbp, %r15 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: adcq %rbp, %r8 +; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill @@ -7224,104 +7208,106 @@ ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: movq %rcx, %rbp ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %r10, %r9 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload ; X64-NEXT: movq %r12, %r10 ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r9 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r12, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %r12 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdi, %rbp +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: adcq %rax, %rdi ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: addq %r14, %rbx -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: adcq %r8, %rdi ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %r12, %r11 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r8 ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r14, %rcx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 56(%rax), %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq 56(%rax), %rsi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %r14 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %cl ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rcx +; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: addq %r8, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: adcq %r11, %rsi ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %rsi ; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: adcq %r15, %r14 +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdi, %r14 ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: addq %r9, %rcx @@ -7336,69 +7322,65 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r9, %rbx -; X64-NEXT: adcq $0, %r15 +; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %r8, %rdi -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %r15, %r9 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: adcq %r12, %r9 ; X64-NEXT: setb %bl ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r9, %rax ; X64-NEXT: movzbl %bl, %edi ; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: addq %r11, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: adcq %r13, %rbp -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: addq %r8, %r12 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: adcq %r11, %r10 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: adcq %rsi, %r8 +; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq %rax, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: adcq $0, %r8 -; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %rbp +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r11, %rbx @@ -7407,176 +7389,171 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rbx, %r9 ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: adcq %rdx, %r15 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r13 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rcx, %r14 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: addq %rcx, %r13 ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: adcq %r12, %r11 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq %r9, %r11 ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r13, %rax +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r14, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r15 ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r8, %rcx +; X64-NEXT: addq %rbx, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: setb %cl -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %r13, %r9 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: addq %r13, %rsi -; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r14 ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %r13, %r9 +; X64-NEXT: movq %r9, %r13 ; X64-NEXT: adcq %r11, %r8 -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %r8, %r15 +; X64-NEXT: adcq $0, %r14 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %r10 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rdi, %r9 ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl +; X64-NEXT: setb %sil ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %r13, %rsi +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq %r14, %rcx +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: addq %r14, %r11 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r15, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %r11 -; X64-NEXT: movq %r11, (%rsp) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq %r12, %r13 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r10, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: movq 64(%r9), %r11 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq 64(%rcx), %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 72(%rcx), %rsi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq 72(%r9), %rsi +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq %rdx, %rsi @@ -7584,9 +7561,9 @@ ; X64-NEXT: addq %rbx, %r8 ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r10 +; X64-NEXT: movq %rcx, %r13 ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi @@ -7598,141 +7575,138 @@ ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: addq %rax, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: adcq %rdx, %r15 -; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: addq %rdi, %r10 ; X64-NEXT: adcq %rcx, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %rax ; X64-NEXT: movq %r11, %rsi ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r11 +; X64-NEXT: mulq %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: adcq %r13, %r14 ; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %r13, %rbx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq %r8, %r14 -; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq 80(%rbp), %rdi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq 80(%r9), %rdi +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq 88(%rbp), %r10 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq 88(%r9), %r9 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %r12b +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rbp, %rsi -; X64-NEXT: movzbl %r11b, %eax +; X64-NEXT: movzbl %r12b, %eax ; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: addq %rax, %rbp +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rsi, %rbp -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: addq %rbx, %r13 -; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r11, %rax +; X64-NEXT: adcq %rdx, %r13 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: adcq %rcx, %r13 +; X64-NEXT: addq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %rax -; X64-NEXT: addq %r12, %rbp -; X64-NEXT: movq %rbp, %r8 -; X64-NEXT: adcq %r15, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: adcq %r15, %r13 ; X64-NEXT: setb %r14b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %r9, %rsi +; X64-NEXT: addq %r11, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq %r12, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r8, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r8, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r13, %rbp +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %r10 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: imulq %rax, %r9 +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r10, %rdx +; X64-NEXT: addq %r9, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: imulq %rbp, %rdi ; X64-NEXT: addq %rdx, %rdi @@ -7752,11 +7726,11 @@ ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx @@ -7777,12 +7751,11 @@ ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: addq %r9, %r13 ; X64-NEXT: adcq %r8, %r12 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq 120(%rdx), %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq 120(%rbp), %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 112(%rdx), %rsi -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 112(%rbp), %rsi ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r11 @@ -7840,46 +7813,45 @@ ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq 80(%rsi), %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: movq 80(%r9), %rsi +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq 88(%rsi), %rax -; X64-NEXT: movq %rsi, %r9 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq 88(%r9), %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r8, %rbx +; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rbx, %r14 ; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: movq (%rsp), %r12 # 8-byte Reload ; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload @@ -7891,8 +7863,8 @@ ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%r9), %r9 -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq 72(%r9), %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx @@ -7905,8 +7877,7 @@ ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rbp @@ -7924,20 +7895,20 @@ ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r8 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %r14, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp @@ -7950,74 +7921,73 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: adcq %r8, %r11 ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq (%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: addq %rsi, %r15 ; X64-NEXT: adcq %r10, %r11 -; X64-NEXT: setb %r10b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r12 +; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: mulq %r14 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %r8b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %r8b, %ecx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: setb %bl +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: addq %r10, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %r14, %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq %r13, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r15, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r11, %rbx +; X64-NEXT: addq %r15, %rbp +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r11, %r12 +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; X64-NEXT: adcq %rax, %rbx ; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq 96(%rbp), %rcx ; X64-NEXT: imulq %rcx, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r12, %rsi -; X64-NEXT: mulq %r12 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rdi, %rdx ; X64-NEXT: movq 104(%rbp), %r8 @@ -8067,32 +8037,31 @@ ; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq %rdi, %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %rsi -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: imulq %r13, %rsi +; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: imulq %r11, %rcx -; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: imulq %r11, %r8 +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %r14, %rax +; X64-NEXT: movq %r14, %rax +; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r8, %r10 -; X64-NEXT: adcq %r9, %rax +; X64-NEXT: addq %r9, %r10 +; X64-NEXT: adcq %r8, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r8 @@ -8128,7 +8097,7 @@ ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload @@ -8141,7 +8110,7 @@ ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, %r10 ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: adcq (%rsp), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload Index: test/CodeGen/X86/mul-i256.ll =================================================================== --- test/CodeGen/X86/mul-i256.ll +++ test/CodeGen/X86/mul-i256.ll @@ -25,15 +25,15 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 12(%ecx), %ebp ; X32-NEXT: movl 8(%ecx), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl (%eax), %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx @@ -44,60 +44,60 @@ ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: xorl %edx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %esi, %eax ; X32-NEXT: adcl %ebp, %edx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl (%esi), %ebp ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl 4(%esi), %esi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax @@ -107,84 +107,84 @@ ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl 8(%eax), %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 8(%edi), %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, %ecx ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl 12(%ecx), %ecx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl 12(%edi), %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %ecx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebp, %edi -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %edi, %ebp -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %ebx, %edi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb (%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -192,80 +192,80 @@ ; X32-NEXT: addl %ebx, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 16(%ecx), %esi ; X32-NEXT: imull %esi, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ebx, %edx ; X32-NEXT: movl 20(%ecx), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 24(%ecx), %eax ; X32-NEXT: movl %ecx, %ebp ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: imull %ecx, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: addl %edi, %edx ; X32-NEXT: movl 28(%ebp), %ebp ; X32-NEXT: imull %ebx, %ebp ; X32-NEXT: addl %edx, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %edx, (%esp) # 4-byte Folded Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebx, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx @@ -273,37 +273,37 @@ ; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: adcl %ebp, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl 28(%ebx), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: imull %esi, %ecx ; X32-NEXT: movl 24(%ebx), %edi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %edi ; X32-NEXT: movl 16(%ebx), %ebp ; X32-NEXT: movl 20(%ebx), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: imull %ebp, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx @@ -311,38 +311,38 @@ ; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: adcl %ecx, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, (%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 4(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 8(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 12(%ecx) ; X32-NEXT: movl %ebx, 16(%ecx) ; X32-NEXT: movl %esi, 20(%ecx) Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -12,9 +12,9 @@ ; X32-NEXT: subl $244, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 20(%ecx), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 16(%ecx), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %ebp ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx @@ -27,37 +27,37 @@ ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %esi, %edi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %ebx ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl 24(%ebp), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: addl %ebx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 4(%ecx), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebx @@ -65,73 +65,73 @@ ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %ebx ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl 8(%esi), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %esi ; X32-NEXT: addl %eax, %esi ; X32-NEXT: movl %edi, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ecx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %ebp, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ebp, %ebp ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl %eax, %edx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 16(%eax), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl %ecx, %ebp -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %eax, %edi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 4(%eax), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edi, %edi ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %esi @@ -139,107 +139,107 @@ ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebp, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %ebx ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %edi ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl 20(%esi), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %ebx ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl 24(%esi), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %esi ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: adcl %ebp, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %esi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax @@ -247,54 +247,54 @@ ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %ebp ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -303,154 +303,154 @@ ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: mull (%esp) # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebx, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebp, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %edi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi @@ -458,93 +458,93 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebp, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -552,11 +552,11 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl %ebp, %eax @@ -564,21 +564,21 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -586,137 +586,137 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %ebp ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebp, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx @@ -724,11 +724,11 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax @@ -736,21 +736,21 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -758,33 +758,33 @@ ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl %ebp, %edi ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -792,101 +792,101 @@ ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebx, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 32(%ecx), %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -898,10 +898,10 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax @@ -911,27 +911,27 @@ ; X32-NEXT: addl %esi, %ebp ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %ebp, %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -939,102 +939,101 @@ ; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 40(%eax), %ebp -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl 44(%ebx), %ebx -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 44(%eax), %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %edi, %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ecx ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax @@ -1042,104 +1041,103 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ebp, %edx -; X32-NEXT: imull {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl %esi, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, %edi ; X32-NEXT: adcl %ebp, %esi -; X32-NEXT: movl %esi, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl 60(%edx), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 60(%edi), %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl 56(%edx), %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl 56(%edi), %esi ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl 48(%edi), %ebx ; X32-NEXT: movl 52(%edi), %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %edi ; X32-NEXT: imull %ebp, %edi -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp @@ -1147,98 +1145,98 @@ ; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: movl 40(%esi), %ebx ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl 44(%ecx), %ecx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl 32(%esi), %edi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 36(%esi), %esi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax @@ -1248,30 +1246,30 @@ ; X32-NEXT: addl %edi, %esi ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl %ebp, %eax -; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -1279,48 +1277,48 @@ ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx @@ -1328,193 +1326,193 @@ ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 48(%ecx), %ebp ; X32-NEXT: imull %ebp, %ebx ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ebx, %edx ; X32-NEXT: movl 52(%ecx), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: imull %eax, %edi ; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 56(%ecx), %eax ; X32-NEXT: movl %ecx, %ebx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: imull %esi, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %edi, %edx ; X32-NEXT: movl 60(%ebx), %ebx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: imull %ecx, %ebx ; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: imull %ebp, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %edi, %edx -; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: imull %edi, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, (%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 4(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 8(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 12(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 16(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 20(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 24(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 28(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 32(%ecx) -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 36(%ecx) -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, 40(%ecx) ; X32-NEXT: movl %esi, 44(%ecx) ; X32-NEXT: movl %edx, 48(%ecx) @@ -1540,7 +1538,7 @@ ; X64-NEXT: movq %rdx, (%rsp) # 8-byte Spill ; X64-NEXT: movq 24(%rdi), %r11 ; X64-NEXT: movq 16(%rdi), %r15 -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: movq 8(%rsi), %rbp ; X64-NEXT: movq %r15, %rax @@ -1549,7 +1547,7 @@ ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %r10 ; X64-NEXT: movq %rdx, %rbx @@ -1557,7 +1555,7 @@ ; X64-NEXT: addq %r9, %rsi ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r9 @@ -1568,37 +1566,37 @@ ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rbp, %r14 -; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %r10, %rbx -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %r10, %r15 ; X64-NEXT: adcq %r13, %rdx ; X64-NEXT: addq %rbp, %r15 ; X64-NEXT: adcq %rsi, %rdx ; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rdi), %rcx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq 8(%rdi), %rdi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rsi @@ -1608,7 +1606,7 @@ ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: addq %rsi, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rbx ; X64-NEXT: setb %r11b ; X64-NEXT: movq %rdi, %rax @@ -1631,16 +1629,16 @@ ; X64-NEXT: adcq %r9, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq 16(%rsi), %r8 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rbp @@ -1652,7 +1650,7 @@ ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bpl ; X64-NEXT: movq %rcx, %rax @@ -1665,31 +1663,31 @@ ; X64-NEXT: movq %r8, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rax, %r11 ; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: addq %r9, %r11 ; X64-NEXT: adcq %rbx, %r14 ; X64-NEXT: addq %r10, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq %r13, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: adcq $0, %r14 ; X64-NEXT: addq %r15, %r11 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: setb %r9b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi @@ -1698,27 +1696,27 @@ ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: addq %rbp, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %r11, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r14, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl %r9b, %eax ; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq 32(%rcx), %rsi ; X64-NEXT: imulq %rsi, %rdi ; X64-NEXT: movq %rsi, %rax @@ -1731,9 +1729,9 @@ ; X64-NEXT: movq 48(%rcx), %rax ; X64-NEXT: movq %rcx, %rbx ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: imulq %rcx, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rdi, %rdx @@ -1746,7 +1744,7 @@ ; X64-NEXT: movq %rbp, %r10 ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: mulq %rsi @@ -1770,33 +1768,32 @@ ; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: addq %r14, %r9 ; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq 56(%rdx), %rcx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq 56(%rbp), %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 48(%rdx), %rbx -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq 48(%rbp), %rbx ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rbx ; X64-NEXT: addq %rdx, %rbx ; X64-NEXT: movq 32(%rbp), %rdi ; X64-NEXT: movq 40(%rbp), %r8 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: imulq %r8, %rcx ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %rsi, %r14 ; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %r12 @@ -1820,23 +1817,23 @@ ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r14, %rax -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq %r13, %rdi ; X64-NEXT: adcq %r9, %rax ; X64-NEXT: adcq %r11, %rdx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, (%rcx) -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, 8(%rcx) -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, 16(%rcx) -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, 24(%rcx) ; X64-NEXT: movq %rsi, 32(%rcx) ; X64-NEXT: movq %rdi, 40(%rcx) Index: test/CodeGen/X86/mul128.ll =================================================================== --- test/CodeGen/X86/mul128.ll +++ test/CodeGen/X86/mul128.ll @@ -6,8 +6,8 @@ ; X64-LABEL: foo: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: mulq %rdx ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi @@ -51,7 +51,7 @@ ; X86-NEXT: imull %ebp, %edi ; X86-NEXT: addl %edx, %edi ; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %ecx, %edi ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -76,7 +76,7 @@ ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %edi, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%esp), %esi # 4-byte Reload Index: test/CodeGen/X86/mul64.ll =================================================================== --- test/CodeGen/X86/mul64.ll +++ test/CodeGen/X86/mul64.ll @@ -19,8 +19,8 @@ ; ; X64-LABEL: foo: ; X64: # %bb.0: -; X64-NEXT: imulq %rsi, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: imulq %rsi, %rax ; X64-NEXT: retq %k = mul i64 %t, %u ret i64 %k Index: test/CodeGen/X86/mwaitx-schedule.ll =================================================================== --- test/CodeGen/X86/mwaitx-schedule.ll +++ test/CodeGen/X86/mwaitx-schedule.ll @@ -6,22 +6,22 @@ define void @foo(i8* %P, i32 %E, i32 %H) nounwind { ; GENERIC-LABEL: foo: ; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: monitorx # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BDVER4-LABEL: foo: ; BDVER4: # %bb.0: -; BDVER4-NEXT: leaq (%rdi), %rax ; BDVER4-NEXT: movl %esi, %ecx +; BDVER4-NEXT: leaq (%rdi), %rax ; BDVER4-NEXT: monitorx ; BDVER4-NEXT: retq ; ; ZNVER1-LABEL: foo: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: monitorx # sched: [100:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.monitorx(i8* %P, i32 %E, i32 %H) @@ -33,9 +33,9 @@ ; GENERIC-LABEL: bar: ; GENERIC: # %bb.0: ; GENERIC-NEXT: pushq %rbx # sched: [5:1.00] -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] ; GENERIC-NEXT: movl %edx, %ebx # sched: [1:0.33] +; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: mwaitx # sched: [100:0.33] ; GENERIC-NEXT: popq %rbx # sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -43,9 +43,9 @@ ; BDVER4-LABEL: bar: ; BDVER4: # %bb.0: ; BDVER4-NEXT: pushq %rbx -; BDVER4-NEXT: movl %edi, %ecx -; BDVER4-NEXT: movl %esi, %eax ; BDVER4-NEXT: movl %edx, %ebx +; BDVER4-NEXT: movl %esi, %eax +; BDVER4-NEXT: movl %edi, %ecx ; BDVER4-NEXT: mwaitx ; BDVER4-NEXT: popq %rbx ; BDVER4-NEXT: retq @@ -53,9 +53,9 @@ ; ZNVER1-LABEL: bar: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: pushq %rbx # sched: [1:0.50] -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] ; ZNVER1-NEXT: movl %edx, %ebx # sched: [1:0.25] +; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: mwaitx # sched: [100:0.25] ; ZNVER1-NEXT: popq %rbx # sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] Index: test/CodeGen/X86/mwaitx.ll =================================================================== --- test/CodeGen/X86/mwaitx.ll +++ test/CodeGen/X86/mwaitx.ll @@ -4,8 +4,9 @@ ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=bdver4 | FileCheck %s -check-prefix=WIN64 ; CHECK-LABEL: foo: -; CHECK: leaq (%rdi), %rax -; CHECK-NEXT: movl %esi, %ecx +; CHECK-LABEL: # %bb.0: +; CHECK-DAG: leaq (%rdi), %rax +; CHECK-DAG: movl %esi, %ecx ; CHECK-NEXT: monitorx ; WIN64-LABEL: foo: ; WIN64: leaq (%rcx), %rax @@ -21,13 +22,15 @@ declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind ; CHECK-LABEL: bar: -; CHECK: movl %edi, %ecx -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edx, %ebx +; CHECK: pushq +; CHECK-DAG: movl %edi, %ecx +; CHECK-DAG: movl %esi, %eax +; CHECK-DAG: movl %edx, %ebx ; CHECK-NEXT: mwaitx ; WIN64-LABEL: bar: -; WIN64: movl %edx, %eax -; WIN64: movl %r8d, %ebx +; WIN64: pushq +; WIN64-DAG: movl %edx, %eax +; WIN64-DAG: movl %r8d, %ebx ; WIN64-NEXT: mwaitx define void @bar(i32 %E, i32 %H, i32 %C) nounwind { entry: Index: test/CodeGen/X86/negate-i1.ll =================================================================== --- test/CodeGen/X86/negate-i1.ll +++ test/CodeGen/X86/negate-i1.ll @@ -5,9 +5,10 @@ define i8 @select_i8_neg1_or_0(i1 %a) { ; X64-LABEL: select_i8_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: andb $1, %dil -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $1, %al +; X64-NEXT: negb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0: @@ -23,8 +24,9 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i8_neg1_or_0_zeroext: ; X64: # %bb.0: -; X64-NEXT: negb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i8_neg1_or_0_zeroext: @@ -39,9 +41,10 @@ define i16 @select_i16_neg1_or_0(i1 %a) { ; X64-LABEL: select_i16_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0: @@ -58,8 +61,9 @@ define i16 @select_i16_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i16_neg1_or_0_zeroext: ; X64: # %bb.0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; ; X32-LABEL: select_i16_neg1_or_0_zeroext: @@ -75,9 +79,9 @@ define i32 @select_i32_neg1_or_0(i1 %a) { ; X64-LABEL: select_i32_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: andl $1, %edi -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0: @@ -93,8 +97,8 @@ define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) { ; X64-LABEL: select_i32_neg1_or_0_zeroext: ; X64: # %bb.0: -; X64-NEXT: negl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax ; X64-NEXT: retq ; ; X32-LABEL: select_i32_neg1_or_0_zeroext: @@ -109,10 +113,9 @@ define i64 @select_i64_neg1_or_0(i1 %a) { ; X64-LABEL: select_i64_neg1_or_0: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $1, %edi -; X64-NEXT: negq %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax ; X64-NEXT: retq ; ; X32-LABEL: select_i64_neg1_or_0: Index: test/CodeGen/X86/negate-shift.ll =================================================================== --- test/CodeGen/X86/negate-shift.ll +++ test/CodeGen/X86/negate-shift.ll @@ -4,8 +4,8 @@ define i32 @neg_lshr_signbit(i32 %x) { ; X64-LABEL: neg_lshr_signbit: ; X64: # %bb.0: -; X64-NEXT: sarl $31, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarl $31, %eax ; X64-NEXT: retq %sh = lshr i32 %x, 31 %neg = sub i32 0, %sh @@ -15,8 +15,8 @@ define i64 @neg_ashr_signbit(i64 %x) { ; X64-LABEL: neg_ashr_signbit: ; X64: # %bb.0: -; X64-NEXT: shrq $63, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $63, %rax ; X64-NEXT: retq %sh = ashr i64 %x, 63 %neg = sub i64 0, %sh Index: test/CodeGen/X86/negate.ll =================================================================== --- test/CodeGen/X86/negate.ll +++ test/CodeGen/X86/negate.ll @@ -42,8 +42,9 @@ define i8 @negate_zero_or_minsigned(i8 %x) { ; CHECK-LABEL: negate_zero_or_minsigned: ; CHECK: # %bb.0: -; CHECK-NEXT: shlb $7, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %signbit = shl i8 %x, 7 %neg = sub i8 0, %signbit Index: test/CodeGen/X86/no-sse2-avg.ll =================================================================== --- test/CodeGen/X86/no-sse2-avg.ll +++ test/CodeGen/X86/no-sse2-avg.ll @@ -5,9 +5,9 @@ define <16 x i8> @PR27973() { ; CHECK-LABEL: PR27973: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movq $0, 8(%rdi) ; CHECK-NEXT: movq $0, (%rdi) -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %t0 = zext <16 x i8> zeroinitializer to <16 x i32> %t1 = add nuw nsw <16 x i32> %t0, Index: test/CodeGen/X86/not-and-simplify.ll =================================================================== --- test/CodeGen/X86/not-and-simplify.ll +++ test/CodeGen/X86/not-and-simplify.ll @@ -7,9 +7,9 @@ define i32 @shrink_xor_constant1(i32 %x) { ; ALL-LABEL: shrink_xor_constant1: ; ALL: # %bb.0: -; ALL-NEXT: shrl $31, %edi -; ALL-NEXT: xorl $1, %edi ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shrl $31, %eax +; ALL-NEXT: xorl $1, %eax ; ALL-NEXT: retq %sh = lshr i32 %x, 31 %not = xor i32 %sh, -1 @@ -34,9 +34,10 @@ define i8 @shrink_xor_constant2(i8 %x) { ; ALL-LABEL: shrink_xor_constant2: ; ALL: # %bb.0: -; ALL-NEXT: shlb $5, %dil -; ALL-NEXT: xorb $-32, %dil ; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: shlb $5, %al +; ALL-NEXT: xorb $-32, %al +; ALL-NEXT: # kill: def $al killed $al killed $eax ; ALL-NEXT: retq %sh = shl i8 %x, 5 %not = xor i8 %sh, -1 Index: test/CodeGen/X86/palignr.ll =================================================================== --- test/CodeGen/X86/palignr.ll +++ test/CodeGen/X86/palignr.ll @@ -167,16 +167,15 @@ ; CHECK-SSE2-LABEL: test9: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 -; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero -; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; CHECK-SSE2-NEXT: por %xmm0, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero +; CHECK-SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] +; CHECK-SSE2-NEXT: por %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retl ; ; CHECK-SSSE3-LABEL: test9: ; CHECK-SSSE3: # %bb.0: -; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] ; CHECK-SSSE3-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] ; CHECK-SSSE3-NEXT: retl ; ; CHECK-AVX-LABEL: test9: Index: test/CodeGen/X86/peep-setb.ll =================================================================== --- test/CodeGen/X86/peep-setb.ll +++ test/CodeGen/X86/peep-setb.ll @@ -7,9 +7,10 @@ define i8 @test1(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -20,9 +21,9 @@ define i32 @test2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -33,9 +34,9 @@ define i64 @test3(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -46,9 +47,10 @@ define i8 @test4(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: sbbb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: sbbb $0, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = zext i1 %cmp to i8 @@ -59,9 +61,9 @@ define i32 @test5(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: sbbl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = zext i1 %cmp to i32 @@ -72,9 +74,9 @@ define i64 @test6(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: sbbq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: sbbq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = zext i1 %cmp to i64 @@ -85,9 +87,10 @@ define i8 @test7(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: adcb $0, %sil ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp ult i8 %a, %b %cond = sext i1 %cmp to i8 @@ -98,9 +101,9 @@ define i32 @test8(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: adcl $0, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %a, %b %cond = sext i1 %cmp to i32 @@ -111,9 +114,9 @@ define i64 @test9(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: retq %cmp = icmp ult i64 %a, %b %conv = sext i1 %cmp to i64 Index: test/CodeGen/X86/pku.ll =================================================================== --- test/CodeGen/X86/pku.ll +++ test/CodeGen/X86/pku.ll @@ -16,9 +16,9 @@ ; ; X64-LABEL: test_x86_wrpkru: ; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] ; X64-NEXT: xorl %edx, %edx ## encoding: [0x31,0xd2] -; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: wrpkru ## encoding: [0x0f,0x01,0xef] ; X64-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.wrpkru(i32 %src) Index: test/CodeGen/X86/pmulh.ll =================================================================== --- test/CodeGen/X86/pmulh.ll +++ test/CodeGen/X86/pmulh.ll @@ -228,6 +228,7 @@ define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) { ; SSE-LABEL: mulhuw_v64i16: ; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm0 ; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm2 @@ -244,7 +245,6 @@ ; SSE-NEXT: movdqa %xmm2, 32(%rdi) ; SSE-NEXT: movdqa %xmm1, 16(%rdi) ; SSE-NEXT: movdqa %xmm0, (%rdi) -; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: retq ; ; AVX2-LABEL: mulhuw_v64i16: @@ -279,6 +279,7 @@ define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) { ; SSE-LABEL: mulhw_v64i16: ; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0 ; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm2 @@ -295,7 +296,6 @@ ; SSE-NEXT: movdqa %xmm2, 32(%rdi) ; SSE-NEXT: movdqa %xmm1, 16(%rdi) ; SSE-NEXT: movdqa %xmm0, (%rdi) -; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: retq ; ; AVX2-LABEL: mulhw_v64i16: Index: test/CodeGen/X86/pr12360.ll =================================================================== --- test/CodeGen/X86/pr12360.ll +++ test/CodeGen/X86/pr12360.ll @@ -32,8 +32,9 @@ define zeroext i1 @f3(i1 %x) { ; CHECK-LABEL: f3: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: andb $1, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/pr15705.ll =================================================================== --- test/CodeGen/X86/pr15705.ll +++ test/CodeGen/X86/pr15705.ll @@ -22,14 +22,14 @@ ; ; X64-LABEL: PR15705: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edx, %eax ; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: je .LBB0_2 ; X64-NEXT: # %bb.1: # %if.end -; X64-NEXT: cmpl %edx, %edi +; X64-NEXT: cmpl %eax, %edi ; X64-NEXT: cmovel %ecx, %esi -; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %esi, %eax ; X64-NEXT: .LBB0_2: # %return -; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq entry: %cmp = icmp eq i32 %x, %a Index: test/CodeGen/X86/pr15981.ll =================================================================== --- test/CodeGen/X86/pr15981.ll +++ test/CodeGen/X86/pr15981.ll @@ -19,9 +19,9 @@ ; ; X64-LABEL: fn1: ; X64: # %bb.0: -; X64-NEXT: testl %esi, %esi -; X64-NEXT: cmovel %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: testl %esi, %esi +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq %3 = icmp ne i32 %1, 0 %4 = select i1 %3, i32 %0, i32 0 Index: test/CodeGen/X86/pr23664.ll =================================================================== --- test/CodeGen/X86/pr23664.ll +++ test/CodeGen/X86/pr23664.ll @@ -7,8 +7,9 @@ ret i2 %or ; CHECK-LABEL: f: -; CHECK: addb %dil, %dil -; CHECK-NEXT: orb $1, %dil -; CHECK-NEXT: movl %edi, %eax +; CHECK: movl %edi, %eax +; CHECK-NEXT: addb %al, %al +; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: # kill ; CHECK-NEXT: retq } Index: test/CodeGen/X86/pr28173.ll =================================================================== --- test/CodeGen/X86/pr28173.ll +++ test/CodeGen/X86/pr28173.ll @@ -78,8 +78,9 @@ define i8 @foo8(i1 zeroext %i) #0 { ; CHECK-LABEL: foo8: ; CHECK: # %bb.0: -; CHECK-NEXT: orb $-2, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $-2, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq br label %bb Index: test/CodeGen/X86/pr34653.ll =================================================================== --- test/CodeGen/X86/pr34653.ll +++ test/CodeGen/X86/pr34653.ll @@ -33,170 +33,170 @@ ; CHECK-NEXT: vmovaps %xmm13, %xmm14 ; CHECK-NEXT: vmovaps %xmm10, %xmm15 ; CHECK-NEXT: vmovaps %xmm15, %xmm2 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vextractf32x4 $3, %zmm9, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vextractf32x4 $2, %zmm9, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm9, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vextractf32x4 $3, %zmm8, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vextractf32x4 $2, %zmm8, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm8, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vextractf32x4 $3, %zmm7, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vextractf32x4 $2, %zmm7, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm7, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm11 = xmm11[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm13 = xmm13[1,0] ; CHECK-NEXT: # kill: def $ymm10 killed $ymm10 killed $zmm10 ; CHECK-NEXT: vextractf128 $1, %ymm10, %xmm10 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm10, %xmm0 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm15 = xmm15[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; CHECK-NEXT: # kill: def $ymm9 killed $ymm9 killed $zmm9 ; CHECK-NEXT: vextractf128 $1, %ymm9, %xmm9 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm9, %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; CHECK-NEXT: # kill: def $ymm8 killed $ymm8 killed $zmm8 ; CHECK-NEXT: vextractf128 $1, %ymm8, %xmm8 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm8, %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; CHECK-NEXT: # kill: def $ymm7 killed $ymm7 killed $zmm7 ; CHECK-NEXT: vextractf128 $1, %ymm7, %xmm7 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovaps %xmm7, %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm8 = xmm8[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm7[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm8, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm13, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm1, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm14, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm2, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm4, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm9, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm10, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm15, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm11, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm3, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm6, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm5, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm12, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm7, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: test/CodeGen/X86/pr34657.ll =================================================================== --- test/CodeGen/X86/pr34657.ll +++ test/CodeGen/X86/pr34657.ll @@ -1,9 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s +; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s define <112 x i8> @pr34657() local_unnamed_addr { -; CHECK-LABEL: pr34657 +; CHECK-LABEL: pr34657: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: vmovups (%rax), %xmm0 ; CHECK-NEXT: vmovups (%rax), %ymm1 ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 @@ -11,7 +12,6 @@ ; CHECK-NEXT: vmovaps %ymm1, 64(%rdi) ; CHECK-NEXT: vmovaps %zmm2, (%rdi) ; CHECK-NEXT: vextractf32x4 $2, %zmm0, 96(%rdi) -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/promote-i16.ll =================================================================== --- test/CodeGen/X86/promote-i16.ll +++ test/CodeGen/X86/promote-i16.ll @@ -12,8 +12,9 @@ ; ; X64-LABEL: foo: ; X64: # %bb.0: # %entry -; X64-NEXT: xorl $21998, %edi # imm = 0x55EE ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $21998, %eax # imm = 0x55EE +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: %0 = xor i16 %x, 21998 @@ -30,8 +31,9 @@ ; ; X64-LABEL: bar: ; X64: # %bb.0: # %entry -; X64-NEXT: xorl $54766, %edi # imm = 0xD5EE ; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl $54766, %eax # imm = 0xD5EE +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: %0 = xor i16 %x, 54766 Index: test/CodeGen/X86/ptest.ll =================================================================== --- test/CodeGen/X86/ptest.ll +++ test/CodeGen/X86/ptest.ll @@ -233,16 +233,16 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel128: ; SSE41: # %bb.0: -; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi ; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: ptest %xmm0, %xmm0 +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel128: ; AVX: # %bb.0: -; AVX-NEXT: vptest %xmm0, %xmm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %xmm0, %xmm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq %t0 = bitcast <4 x i32> %input to i128 %t1 = icmp ne i128 %t0, 0 @@ -253,17 +253,17 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel256: ; SSE41: # %bb.0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: ptest %xmm0, %xmm0 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: vecsel256: ; AVX: # %bb.0: -; AVX-NEXT: vptest %ymm0, %ymm0 -; AVX-NEXT: cmovel %esi, %edi ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: vptest %ymm0, %ymm0 +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %t0 = bitcast <8 x i32> %input to i256 @@ -275,45 +275,45 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; SSE41-LABEL: vecsel512: ; SSE41: # %bb.0: +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: por %xmm3, %xmm1 ; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: ptest %xmm1, %xmm1 -; SSE41-NEXT: cmovel %esi, %edi -; SSE41-NEXT: movl %edi, %eax +; SSE41-NEXT: cmovel %esi, %eax ; SSE41-NEXT: retq ; ; AVX1-LABEL: vecsel512: ; AVX1: # %bb.0: +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vptest %ymm0, %ymm0 -; AVX1-NEXT: cmovel %esi, %edi -; AVX1-NEXT: movl %edi, %eax +; AVX1-NEXT: cmovel %esi, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; AVX512-NEXT: vmovq %xmm1, %rax +; AVX512-NEXT: vmovq %xmm1, %rcx ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512-NEXT: vmovq %xmm2, %rcx -; AVX512-NEXT: orq %rax, %rcx +; AVX512-NEXT: vmovq %xmm2, %rdx +; AVX512-NEXT: orq %rcx, %rdx ; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; AVX512-NEXT: vmovq %xmm3, %rax -; AVX512-NEXT: orq %rcx, %rax -; AVX512-NEXT: vmovq %xmm0, %rcx -; AVX512-NEXT: orq %rax, %rcx -; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vpextrq $1, %xmm2, %rdx -; AVX512-NEXT: orq %rax, %rdx -; AVX512-NEXT: vpextrq $1, %xmm3, %rax -; AVX512-NEXT: orq %rdx, %rax -; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: orq %rax, %rdx +; AVX512-NEXT: vmovq %xmm3, %rcx +; AVX512-NEXT: orq %rdx, %rcx +; AVX512-NEXT: vmovq %xmm0, %rdx ; AVX512-NEXT: orq %rcx, %rdx -; AVX512-NEXT: cmovel %esi, %edi -; AVX512-NEXT: movl %edi, %eax +; AVX512-NEXT: vpextrq $1, %xmm1, %rcx +; AVX512-NEXT: vpextrq $1, %xmm2, %rdi +; AVX512-NEXT: orq %rcx, %rdi +; AVX512-NEXT: vpextrq $1, %xmm3, %rcx +; AVX512-NEXT: orq %rdi, %rcx +; AVX512-NEXT: vpextrq $1, %xmm0, %rdi +; AVX512-NEXT: orq %rcx, %rdi +; AVX512-NEXT: orq %rdx, %rdi +; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %t0 = bitcast <16 x i32> %input to i512 Index: test/CodeGen/X86/rot16.ll =================================================================== --- test/CodeGen/X86/rot16.ll +++ test/CodeGen/X86/rot16.ll @@ -13,8 +13,10 @@ ; X64-LABEL: foo: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldw %cl, %ax, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = shl i16 %x, %z %t1 = sub i16 16, %z @@ -35,8 +37,10 @@ ; X64-LABEL: bar: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldw %cl, %di, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = shl i16 %y, %z %t1 = sub i16 16, %z @@ -56,8 +60,10 @@ ; X64-LABEL: un: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdw %cl, %ax, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %x, %z %t1 = sub i16 16, %z @@ -78,8 +84,10 @@ ; X64-LABEL: bu: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdw %cl, %di, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %y, %z %t1 = sub i16 16, %z @@ -97,8 +105,9 @@ ; ; X64-LABEL: xfoo: ; X64: # %bb.0: -; X64-NEXT: rolw $5, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $5, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %x, 11 %t1 = shl i16 %x, 5 @@ -116,8 +125,9 @@ ; ; X64-LABEL: xbar: ; X64: # %bb.0: -; X64-NEXT: shldw $5, %di, %si ; X64-NEXT: movl %esi, %eax +; X64-NEXT: shldw $5, %di, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = shl i16 %y, 5 %t1 = lshr i16 %x, 11 @@ -134,8 +144,9 @@ ; ; X64-LABEL: xun: ; X64: # %bb.0: -; X64-NEXT: rolw $11, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $11, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %x, 5 %t1 = shl i16 %x, 11 @@ -153,8 +164,9 @@ ; ; X64-LABEL: xbu: ; X64: # %bb.0: -; X64-NEXT: shldw $11, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: shldw $11, %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i16 %y, 5 %t1 = shl i16 %x, 11 Index: test/CodeGen/X86/rot64.ll =================================================================== --- test/CodeGen/X86/rot64.ll +++ test/CodeGen/X86/rot64.ll @@ -6,9 +6,10 @@ define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: foo: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rolq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: rolq %cl, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %x, %z @@ -21,9 +22,10 @@ define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bar: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shldq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: shldq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, %z @@ -36,9 +38,10 @@ define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: un: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: rorq %cl, %rdi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: rorq %cl, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %x, %z @@ -51,9 +54,10 @@ define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: bu: ; ALL: # %bb.0: # %entry -; ALL-NEXT: movl %edx, %ecx -; ALL-NEXT: shrdq %cl, %rdi, %rsi +; ALL-NEXT: movq %rdx, %rcx ; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: # kill: def $cl killed $cl killed $rcx +; ALL-NEXT: shrdq %cl, %rdi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, %z @@ -66,14 +70,14 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xfoo: ; X64: # %bb.0: # %entry -; X64-NEXT: rolq $7, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $7, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xfoo: ; SHLD: # %bb.0: # %entry -; SHLD-NEXT: shldq $7, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $7, %rdi, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xfoo: @@ -115,8 +119,8 @@ define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbar: ; ALL: # %bb.0: # %entry -; ALL-NEXT: shrdq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shrdq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = shl i64 %y, 7 @@ -128,14 +132,14 @@ define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { ; X64-LABEL: xun: ; X64: # %bb.0: # %entry -; X64-NEXT: rolq $57, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $57, %rax ; X64-NEXT: retq ; ; SHLD-LABEL: xun: ; SHLD: # %bb.0: # %entry -; SHLD-NEXT: shldq $57, %rdi, %rdi ; SHLD-NEXT: movq %rdi, %rax +; SHLD-NEXT: shldq $57, %rdi, %rax ; SHLD-NEXT: retq ; ; BMI2-LABEL: xun: @@ -177,8 +181,8 @@ define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone { ; ALL-LABEL: xbu: ; ALL: # %bb.0: # %entry -; ALL-NEXT: shldq $57, %rsi, %rdi ; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: shldq $57, %rsi, %rax ; ALL-NEXT: retq entry: %0 = lshr i64 %y, 7 Index: test/CodeGen/X86/rotate.ll =================================================================== --- test/CodeGen/X86/rotate.ll +++ test/CodeGen/X86/rotate.ll @@ -43,8 +43,9 @@ ; X64-LABEL: rotl64: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rolq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rolq %cl, %rax ; X64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i64 %B = shl i64 %A, %shift.upgrd.1 @@ -96,8 +97,9 @@ ; X64-LABEL: rotr64: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorq %cl, %rax ; X64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i64 %B = lshr i64 %A, %shift.upgrd.3 @@ -120,8 +122,8 @@ ; ; X64-LABEL: rotli64: ; X64: # %bb.0: -; X64-NEXT: rolq $5, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $5, %rax ; X64-NEXT: retq %B = shl i64 %A, 5 %C = lshr i64 %A, 59 @@ -141,8 +143,8 @@ ; ; X64-LABEL: rotri64: ; X64: # %bb.0: -; X64-NEXT: rolq $59, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $59, %rax ; X64-NEXT: retq %B = lshr i64 %A, 5 %C = shl i64 %A, 59 @@ -162,8 +164,8 @@ ; ; X64-LABEL: rotl1_64: ; X64: # %bb.0: -; X64-NEXT: rolq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq %rax ; X64-NEXT: retq %B = shl i64 %A, 1 %C = lshr i64 %A, 63 @@ -183,8 +185,8 @@ ; ; X64-LABEL: rotr1_64: ; X64: # %bb.0: -; X64-NEXT: rorq %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rorq %rax ; X64-NEXT: retq %B = shl i64 %A, 63 %C = lshr i64 %A, 1 @@ -203,8 +205,9 @@ ; X64-LABEL: rotl32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: roll %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: roll %cl, %eax ; X64-NEXT: retq %shift.upgrd.1 = zext i8 %Amt to i32 %B = shl i32 %A, %shift.upgrd.1 @@ -226,8 +229,9 @@ ; X64-LABEL: rotr32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorl %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorl %cl, %eax ; X64-NEXT: retq %shift.upgrd.3 = zext i8 %Amt to i32 %B = lshr i32 %A, %shift.upgrd.3 @@ -247,8 +251,8 @@ ; ; X64-LABEL: rotli32: ; X64: # %bb.0: -; X64-NEXT: roll $5, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: roll $5, %eax ; X64-NEXT: retq %B = shl i32 %A, 5 %C = lshr i32 %A, 27 @@ -265,8 +269,8 @@ ; ; X64-LABEL: rotri32: ; X64: # %bb.0: -; X64-NEXT: roll $27, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: roll $27, %eax ; X64-NEXT: retq %B = lshr i32 %A, 5 %C = shl i32 %A, 27 @@ -283,8 +287,8 @@ ; ; X64-LABEL: rotl1_32: ; X64: # %bb.0: -; X64-NEXT: roll %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: roll %eax ; X64-NEXT: retq %B = shl i32 %A, 1 %C = lshr i32 %A, 31 @@ -301,8 +305,8 @@ ; ; X64-LABEL: rotr1_32: ; X64: # %bb.0: -; X64-NEXT: rorl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rorl %eax ; X64-NEXT: retq %B = shl i32 %A, 31 %C = lshr i32 %A, 1 @@ -321,8 +325,10 @@ ; X64-LABEL: rotl16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rolw %cl, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rolw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %shift.upgrd.5 = zext i8 %Amt to i16 %B = shl i16 %A, %shift.upgrd.5 @@ -344,8 +350,10 @@ ; X64-LABEL: rotr16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorw %cl, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %shift.upgrd.7 = zext i8 %Amt to i16 %B = lshr i16 %A, %shift.upgrd.7 @@ -365,8 +373,9 @@ ; ; X64-LABEL: rotli16: ; X64: # %bb.0: -; X64-NEXT: rolw $5, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $5, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %B = shl i16 %A, 5 %C = lshr i16 %A, 11 @@ -383,8 +392,9 @@ ; ; X64-LABEL: rotri16: ; X64: # %bb.0: -; X64-NEXT: rolw $11, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw $11, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %B = lshr i16 %A, 5 %C = shl i16 %A, 11 @@ -401,8 +411,9 @@ ; ; X64-LABEL: rotl1_16: ; X64: # %bb.0: -; X64-NEXT: rolw %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolw %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %B = shl i16 %A, 1 %C = lshr i16 %A, 15 @@ -419,8 +430,9 @@ ; ; X64-LABEL: rotr1_16: ; X64: # %bb.0: -; X64-NEXT: rorw %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rorw %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %B = lshr i16 %A, 1 %C = shl i16 %A, 15 @@ -439,8 +451,10 @@ ; X64-LABEL: rotl8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rolb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rolb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %B = shl i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -460,8 +474,10 @@ ; X64-LABEL: rotr8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %B = lshr i8 %A, %Amt %Amt2 = sub i8 8, %Amt @@ -479,8 +495,9 @@ ; ; X64-LABEL: rotli8: ; X64: # %bb.0: -; X64-NEXT: rolb $5, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $5, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %B = shl i8 %A, 5 %C = lshr i8 %A, 3 @@ -497,8 +514,9 @@ ; ; X64-LABEL: rotri8: ; X64: # %bb.0: -; X64-NEXT: rolb $3, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb $3, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %B = lshr i8 %A, 5 %C = shl i8 %A, 3 @@ -515,8 +533,9 @@ ; ; X64-LABEL: rotl1_8: ; X64: # %bb.0: -; X64-NEXT: rolb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rolb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %B = shl i8 %A, 1 %C = lshr i8 %A, 7 @@ -533,8 +552,9 @@ ; ; X64-LABEL: rotr1_8: ; X64: # %bb.0: -; X64-NEXT: rorb %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: rorb %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %B = lshr i8 %A, 1 %C = shl i8 %A, 7 @@ -665,6 +685,7 @@ ; X64-LABEL: truncated_rot: ; X64: # %bb.0: # %entry ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolq %cl, %rdi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq Index: test/CodeGen/X86/rotate2.ll =================================================================== --- test/CodeGen/X86/rotate2.ll +++ test/CodeGen/X86/rotate2.ll @@ -14,8 +14,8 @@ ; ; X64-LABEL: test1: ; X64: # %bb.0: # %entry -; X64-NEXT: rolq $9, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: rolq $9, %rax ; X64-NEXT: retq entry: %tmp2 = lshr i64 %x, 55 ; [#uses=1] @@ -34,9 +34,8 @@ ; ; X64-LABEL: test2: ; X64: # %bb.0: # %entry -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: roll $10, %edi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: roll $10, %eax ; X64-NEXT: retq entry: %tmp2 = lshr i32 %x, 22 ; [#uses=1] Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -16,8 +16,9 @@ ; X64-LABEL: rotate_left_32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: roll %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: roll %cl, %eax ; X64-NEXT: retq %and = and i32 %b, 31 %shl = shl i32 %a, %and @@ -39,8 +40,9 @@ ; X64-LABEL: rotate_right_32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorl %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorl %cl, %eax ; X64-NEXT: retq %and = and i32 %b, 31 %shl = lshr i32 %a, %and @@ -98,9 +100,10 @@ ; ; X64-LABEL: rotate_left_64: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rolq %cl, %rdi +; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: rolq %cl, %rax ; X64-NEXT: retq %and = and i64 %b, 63 %shl = shl i64 %a, %and @@ -158,9 +161,10 @@ ; ; X64-LABEL: rotate_right_64: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorq %cl, %rdi +; X64-NEXT: movq %rsi, %rcx ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: rorq %cl, %rax ; X64-NEXT: retq %and = and i64 %b, 63 %shl = lshr i64 %a, %and @@ -184,6 +188,7 @@ ; X64-LABEL: rotate_left_m32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: roll %cl, (%rdi) ; X64-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -208,6 +213,7 @@ ; X64-LABEL: rotate_right_m32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorl %cl, (%rdi) ; X64-NEXT: retq %a = load i32, i32* %pa, align 16 @@ -276,7 +282,8 @@ ; ; X64-LABEL: rotate_left_m64: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: rolq %cl, (%rdi) ; X64-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -345,7 +352,8 @@ ; ; X64-LABEL: rotate_right_m64: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: rorq %cl, (%rdi) ; X64-NEXT: retq %a = load i64, i64* %pa, align 16 @@ -373,8 +381,10 @@ ; X64-LABEL: rotate_left_8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rolb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rolb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -397,8 +407,10 @@ ; X64-LABEL: rotate_right_8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorb %cl, %dil ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %amt = trunc i32 %amount to i8 %sub = sub i8 0, %amt @@ -421,8 +433,10 @@ ; X64-LABEL: rotate_left_16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rolw %cl, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rolw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -445,8 +459,10 @@ ; X64-LABEL: rotate_right_16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: rorw %cl, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorw %cl, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %amt = trunc i32 %amount to i16 %sub = sub i16 0, %amt @@ -469,6 +485,7 @@ ; X64-LABEL: rotate_left_m8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolb %cl, (%rdi) ; X64-NEXT: retq %x = load i8, i8* %p, align 1 @@ -494,6 +511,7 @@ ; X64-LABEL: rotate_right_m8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorb %cl, (%rdi) ; X64-NEXT: retq %x = load i8, i8* %p, align 1 @@ -519,6 +537,7 @@ ; X64-LABEL: rotate_left_m16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolw %cl, (%rdi) ; X64-NEXT: retq %x = load i16, i16* %p, align 1 @@ -544,6 +563,7 @@ ; X64-LABEL: rotate_right_m16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorw %cl, (%rdi) ; X64-NEXT: retq %x = load i16, i16* %p, align 1 @@ -569,10 +589,11 @@ ; ; X64-LABEL: rotate_demanded_bits: ; X64: # %bb.0: -; X64-NEXT: andb $30, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: roll %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $30, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: roll %cl, %eax ; X64-NEXT: retq %3 = and i32 %1, 30 %4 = shl i32 %0, %3 @@ -594,10 +615,11 @@ ; ; X64-LABEL: rotate_demanded_bits_2: ; X64: # %bb.0: -; X64-NEXT: andb $23, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: roll %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $23, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: roll %cl, %eax ; X64-NEXT: retq %3 = and i32 %1, 23 %4 = shl i32 %0, %3 @@ -620,11 +642,12 @@ ; ; X64-LABEL: rotate_demanded_bits_3: ; X64: # %bb.0: -; X64-NEXT: addb %sil, %sil -; X64-NEXT: andb $30, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: roll %cl, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: addb %cl, %cl +; X64-NEXT: andb $30, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: roll %cl, %eax ; X64-NEXT: retq %3 = shl i32 %1, 1 %4 = and i32 %3, 30 Index: test/CodeGen/X86/sar_fold64.ll =================================================================== --- test/CodeGen/X86/sar_fold64.ll +++ test/CodeGen/X86/sar_fold64.ll @@ -56,9 +56,10 @@ define i8 @all_sign_bit_ashr(i8 %x) { ; CHECK-LABEL: all_sign_bit_ashr: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: negb %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %and = and i8 %x, 1 %neg = sub i8 0, %and Index: test/CodeGen/X86/scalar_widen_div.ll =================================================================== --- test/CodeGen/X86/scalar_widen_div.ll +++ test/CodeGen/X86/scalar_widen_div.ll @@ -57,20 +57,21 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { ; CHECK-LABEL: test_char_div: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %edx, %r10d ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %cl ; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %r8b -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %r10d, %eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %r9b ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: retq %div.r = sdiv <3 x i8> %num, %div ret <3 x i8> %div.r @@ -233,8 +234,8 @@ ; CHECK-LABEL: test_ulong_div: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %r10 -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq %rcx ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: xorl %edx, %edx Index: test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- test/CodeGen/X86/schedule-x86-64-shld.ll +++ test/CodeGen/X86/schedule-x86-64-shld.ll @@ -12,20 +12,20 @@ define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize { ; GENERIC-LABEL: lshift10_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shldq $10, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift10_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shldq $10, %rsi, %rdi # sched: [3:3.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift10_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: shldq $10, %rsi, %rdi ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: shldq $10, %rsi, %rax ; BDVER1-NEXT: retq entry: %shl = shl i64 %a, 10 @@ -37,8 +37,8 @@ define i64 @lshift10(i64 %a, i64 %b) nounwind readnone { ; GENERIC-LABEL: lshift10: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shldq $10, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift10: @@ -70,20 +70,20 @@ define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize { ; GENERIC-LABEL: rshift10_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shrdq $62, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift10_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shrdq $62, %rsi, %rdi # sched: [3:3.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: rshift10_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: shrdq $62, %rsi, %rdi ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: shrdq $62, %rsi, %rax ; BDVER1-NEXT: retq entry: %shl = lshr i64 %a, 62 @@ -96,8 +96,8 @@ define i64 @rshift10(i64 %a, i64 %b) nounwind readnone { ; GENERIC-LABEL: rshift10: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shrdq $62, %rsi, %rdi # sched: [2:0.67] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift10: @@ -126,23 +126,26 @@ define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize { ; GENERIC-LABEL: lshift_cl_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_cl_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx +; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_cl_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shldq %cl, %rsi, %rdi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx +; BDVER1-NEXT: shldq %cl, %rsi, %rax ; BDVER1-NEXT: retq entry: %shl = shl i64 %a, %c @@ -155,33 +158,34 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone { ; GENERIC-LABEL: lshift_cl: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx -; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BTVER2-NEXT: movl $64, %edx # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] +; BTVER2-NEXT: subl %ecx, %edx # sched: [1:0.50] +; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] +; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50] +; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shlq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx -; BDVER1-NEXT: shrq %cl, %rsi -; BDVER1-NEXT: orq %rdi, %rsi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rsi, %rax +; BDVER1-NEXT: shlq %cl, %rdi +; BDVER1-NEXT: movl $64, %edx +; BDVER1-NEXT: subl %ecx, %edx +; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: shrq %cl, %rax +; BDVER1-NEXT: orq %rdi, %rax ; BDVER1-NEXT: retq entry: %shl = shl i64 %a, %c @@ -200,23 +204,26 @@ define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize { ; GENERIC-LABEL: rshift_cl_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift_cl_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx +; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: rshift_cl_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shrdq %cl, %rsi, %rdi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rdi, %rax +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx +; BDVER1-NEXT: shrdq %cl, %rsi, %rax ; BDVER1-NEXT: retq entry: %shr = lshr i64 %a, %c @@ -229,33 +236,34 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone { ; GENERIC-LABEL: rshift_cl: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %edx, %ecx # sched: [1:0.33] -; GENERIC-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] +; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx +; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: rshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx -; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BTVER2-NEXT: movl $64, %edx # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] +; BTVER2-NEXT: subl %ecx, %edx # sched: [1:0.50] +; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] +; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] +; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: rshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx -; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx -; BDVER1-NEXT: shlq %cl, %rsi -; BDVER1-NEXT: orq %rdi, %rsi +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: movq %rsi, %rax +; BDVER1-NEXT: shrq %cl, %rdi +; BDVER1-NEXT: movl $64, %edx +; BDVER1-NEXT: subl %ecx, %edx +; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: shlq %cl, %rax +; BDVER1-NEXT: orq %rdi, %rax ; BDVER1-NEXT: retq entry: %shr = lshr i64 %a, %c @@ -275,19 +283,22 @@ define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize { ; GENERIC-LABEL: lshift_mem_cl_optsize: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx ; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_mem_cl_optsize: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_mem_cl_optsize: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %esi, %ecx +; BDVER1-NEXT: movq %rsi, %rcx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shldq %cl, %rdi, {{.*}}(%rip) ; BDVER1-NEXT: retq entry: @@ -303,33 +314,34 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone { ; GENERIC-LABEL: lshift_mem_cl: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33] +; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx ; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_mem_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00] -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: movq {{.*}}(%rip), %rdx # sched: [5:1.00] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] +; BTVER2-NEXT: movl $64, %eax # sched: [1:0.50] +; BTVER2-NEXT: subl %ecx, %eax # sched: [1:0.50] +; BTVER2-NEXT: shlq %cl, %rdx # sched: [1:0.50] +; BTVER2-NEXT: movl %eax, %ecx # sched: [1:0.50] ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] +; BTVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; BDVER1-LABEL: lshift_mem_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movq {{.*}}(%rip), %rax -; BDVER1-NEXT: movl %esi, %ecx -; BDVER1-NEXT: shlq %cl, %rax -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %esi, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: movq %rsi, %rcx +; BDVER1-NEXT: movq {{.*}}(%rip), %rdx +; BDVER1-NEXT: shlq %cl, %rdx +; BDVER1-NEXT: movl $64, %eax +; BDVER1-NEXT: subl %ecx, %eax +; BDVER1-NEXT: movl %eax, %ecx ; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: orq %rax, %rdi +; BDVER1-NEXT: orq %rdx, %rdi ; BDVER1-NEXT: movq %rdi, {{.*}}(%rip) ; BDVER1-NEXT: retq entry: Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -2541,62 +2541,62 @@ define i32 @test_bswap32(i32 %a0) optsize { ; GENERIC-LABEL: test_bswap32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bswapl %edi # sched: [1:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: bswapl %eax # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_bswap32: ; ATOM: # %bb.0: -; ATOM-NEXT: bswapl %edi # sched: [1:1.00] ; ATOM-NEXT: movl %edi, %eax # sched: [1:0.50] +; ATOM-NEXT: bswapl %eax # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_bswap32: ; SLM: # %bb.0: -; SLM-NEXT: bswapl %edi # sched: [1:0.50] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: bswapl %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_bswap32: ; SANDY: # %bb.0: -; SANDY-NEXT: bswapl %edi # sched: [1:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: bswapl %eax # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bswap32: ; HASWELL: # %bb.0: -; HASWELL-NEXT: bswapl %edi # sched: [1:0.50] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: bswapl %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_bswap32: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bswapl %edi # sched: [1:0.50] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: bswapl %eax # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bswap32: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bswapl %edi # sched: [1:0.50] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: bswapl %eax # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_bswap32: ; SKX: # %bb.0: -; SKX-NEXT: bswapl %edi # sched: [1:0.50] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: bswapl %eax # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bswap32: ; BTVER2: # %bb.0: -; BTVER2-NEXT: bswapl %edi # sched: [1:0.50] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: bswapl %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_bswap32: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bswapl %edi # sched: [1:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: bswapl %eax # sched: [1:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind ret i32 %1 @@ -2604,62 +2604,62 @@ define i64 @test_bswap64(i64 %a0) optsize { ; GENERIC-LABEL: test_bswap64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: bswapq %rdi # sched: [2:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: bswapq %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_bswap64: ; ATOM: # %bb.0: -; ATOM-NEXT: bswapq %rdi # sched: [1:1.00] ; ATOM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; ATOM-NEXT: bswapq %rax # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_bswap64: ; SLM: # %bb.0: -; SLM-NEXT: bswapq %rdi # sched: [1:0.50] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: bswapq %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_bswap64: ; SANDY: # %bb.0: -; SANDY-NEXT: bswapq %rdi # sched: [2:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: bswapq %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bswap64: ; HASWELL: # %bb.0: -; HASWELL-NEXT: bswapq %rdi # sched: [2:0.50] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: bswapq %rax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_bswap64: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bswapq %rdi # sched: [2:0.50] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: bswapq %rax # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bswap64: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bswapq %rdi # sched: [2:0.50] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: bswapq %rax # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_bswap64: ; SKX: # %bb.0: -; SKX-NEXT: bswapq %rdi # sched: [2:0.50] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: bswapq %rax # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bswap64: ; BTVER2: # %bb.0: -; BTVER2-NEXT: bswapq %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: bswapq %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_bswap64: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bswapq %rdi # sched: [1:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: bswapq %rax # sched: [1:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = tail call i64 asm "bswap $0", "=r,0"(i64 %a0) nounwind ret i64 %1 Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -53,6 +53,7 @@ ; GENERIC-NEXT: popq %rcx ; GENERIC-NEXT: retq ; GENERIC-NEXT: LBB1_1: ## %bb90 +; GENERIC-NEXT: ud2 ; ; ATOM-LABEL: test2: ; ATOM: ## %bb.0: ## %entry @@ -70,6 +71,7 @@ ; ATOM-NEXT: popq %rcx ; ATOM-NEXT: retq ; ATOM-NEXT: LBB1_1: ## %bb90 +; ATOM-NEXT: ud2 ; ; MCU-LABEL: test2: ; MCU: # %bb.0: # %entry @@ -642,8 +644,8 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone { ; GENERIC-LABEL: test12: ; GENERIC: ## %bb.0: ## %entry -; GENERIC-NEXT: movl $4, %ecx ; GENERIC-NEXT: movq %rdi, %rax +; GENERIC-NEXT: movl $4, %ecx ; GENERIC-NEXT: mulq %rcx ; GENERIC-NEXT: movq $-1, %rdi ; GENERIC-NEXT: cmovnoq %rax, %rdi @@ -845,16 +847,18 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; GENERIC-LABEL: test18: ; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpl $15, %edi -; GENERIC-NEXT: cmovgel %edx, %esi ; GENERIC-NEXT: movl %esi, %eax +; GENERIC-NEXT: cmpl $15, %edi +; GENERIC-NEXT: cmovgel %edx, %eax +; GENERIC-NEXT: ## kill: def $al killed $al killed $eax ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test18: ; ATOM: ## %bb.0: -; ATOM-NEXT: cmpl $15, %edi -; ATOM-NEXT: cmovgel %edx, %esi ; ATOM-NEXT: movl %esi, %eax +; ATOM-NEXT: cmpl $15, %edi +; ATOM-NEXT: cmovgel %edx, %eax +; ATOM-NEXT: ## kill: def $al killed $al killed $eax ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq @@ -876,16 +880,18 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { ; CHECK-LABEL: trunc_select_miscompile: ; CHECK: ## %bb.0: -; CHECK-NEXT: orb $2, %sil ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orb $2, %cl +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: trunc_select_miscompile: ; MCU: # %bb.0: -; MCU-NEXT: orb $2, %dl ; MCU-NEXT: movl %edx, %ecx +; MCU-NEXT: orb $2, %cl +; MCU-NEXT: # kill: def $cl killed $cl killed $ecx ; MCU-NEXT: shll %cl, %eax ; MCU-NEXT: retl %tmp1 = select i1 %cc, i32 3, i32 2 @@ -1133,10 +1139,10 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_xor_2: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: xorl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_2: @@ -1159,10 +1165,10 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_xor_2b: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: xorl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_2b: @@ -1184,10 +1190,10 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_or: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or: @@ -1210,10 +1216,10 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) { ; CHECK-LABEL: select_or_b: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or_b: @@ -1235,10 +1241,10 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { ; CHECK-LABEL: select_or_1: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or_1: @@ -1261,10 +1267,10 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) { ; CHECK-LABEL: select_or_1b: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovel %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: testb $1, %dl +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: retq ; ; MCU-LABEL: select_or_1b: Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -43,8 +43,8 @@ define i32 @select_1_or_0(i1 %cond) { ; CHECK-LABEL: select_1_or_0: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -62,8 +62,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_1_or_0_signext: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -95,8 +95,8 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_signext: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -107,9 +107,9 @@ define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -118,8 +118,8 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: ; CHECK: # %bb.0: -; CHECK-NEXT: negl %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -329,9 +329,10 @@ define i8 @select_pow2_diff(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff: ; CHECK: # %bb.0: -; CHECK-NEXT: shlb $4, %dil -; CHECK-NEXT: orb $3, %dil ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: orb $3, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %sel = select i1 %cond, i8 19, i8 3 ret i8 %sel Index: test/CodeGen/X86/selectcc-to-shiftand.ll =================================================================== --- test/CodeGen/X86/selectcc-to-shiftand.ll +++ test/CodeGen/X86/selectcc-to-shiftand.ll @@ -7,16 +7,16 @@ define i32 @neg_sel_constants(i32 %a) { ; CHECK-NOBMI-LABEL: neg_sel_constants: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: sarl $31, %edi -; CHECK-NOBMI-NEXT: andl $5, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: sarl $31, %eax +; CHECK-NOBMI-NEXT: andl $5, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: neg_sel_constants: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: sarl $31, %edi -; CHECK-BMI-NEXT: andl $5, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: sarl $31, %eax +; CHECK-BMI-NEXT: andl $5, %eax ; CHECK-BMI-NEXT: retq %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 5, i32 0 @@ -28,16 +28,16 @@ define i32 @neg_sel_special_constant(i32 %a) { ; CHECK-NOBMI-LABEL: neg_sel_special_constant: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: shrl $22, %edi -; CHECK-NOBMI-NEXT: andl $512, %edi # imm = 0x200 ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: shrl $22, %eax +; CHECK-NOBMI-NEXT: andl $512, %eax # imm = 0x200 ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: neg_sel_special_constant: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: shrl $22, %edi -; CHECK-BMI-NEXT: andl $512, %edi # imm = 0x200 ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: shrl $22, %eax +; CHECK-BMI-NEXT: andl $512, %eax # imm = 0x200 ; CHECK-BMI-NEXT: retq %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 512, i32 0 @@ -49,16 +49,16 @@ define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) { ; CHECK-NOBMI-LABEL: neg_sel_variable_and_zero: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: sarl $31, %edi -; CHECK-NOBMI-NEXT: andl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: sarl $31, %eax +; CHECK-NOBMI-NEXT: andl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: neg_sel_variable_and_zero: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: sarl $31, %edi -; CHECK-BMI-NEXT: andl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: sarl $31, %eax +; CHECK-BMI-NEXT: andl %esi, %eax ; CHECK-BMI-NEXT: retq %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 %b, i32 0 @@ -116,18 +116,18 @@ define i32 @pos_sel_special_constant(i32 %a) { ; CHECK-NOBMI-LABEL: pos_sel_special_constant: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: shrl $22, %edi -; CHECK-NOBMI-NEXT: andl $512, %edi # imm = 0x200 ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: shrl $22, %eax +; CHECK-NOBMI-NEXT: andl $512, %eax # imm = 0x200 ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: pos_sel_special_constant: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: notl %edi -; CHECK-BMI-NEXT: shrl $22, %edi -; CHECK-BMI-NEXT: andl $512, %edi # imm = 0x200 ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: notl %eax +; CHECK-BMI-NEXT: shrl $22, %eax +; CHECK-BMI-NEXT: andl $512, %eax # imm = 0x200 ; CHECK-BMI-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 512, i32 0 Index: test/CodeGen/X86/setcc-logic.ll =================================================================== --- test/CodeGen/X86/setcc-logic.ll +++ test/CodeGen/X86/setcc-logic.ll @@ -41,9 +41,10 @@ define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_set: ; CHECK: # %bb.0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 @@ -66,9 +67,10 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_set: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 Index: test/CodeGen/X86/sext-i1.ll =================================================================== --- test/CodeGen/X86/sext-i1.ll +++ test/CodeGen/X86/sext-i1.ll @@ -164,8 +164,8 @@ ; ; X64-LABEL: select_0_or_1s_signext: ; X64: # %bb.0: -; X64-NEXT: notl %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: notl %eax ; X64-NEXT: retq %not = xor i1 %cond, 1 %sext = sext i1 %not to i32 Index: test/CodeGen/X86/shift-and.ll =================================================================== --- test/CodeGen/X86/shift-and.ll +++ test/CodeGen/X86/shift-and.ll @@ -12,9 +12,10 @@ ; ; X64-LABEL: t1: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 31 %res = shl i32 %val, %shamt @@ -31,9 +32,10 @@ ; ; X64-LABEL: t2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shll %cl, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %eax ; X64-NEXT: retq %shamt = and i32 %t, 63 %res = shl i32 %val, %shamt @@ -52,6 +54,7 @@ ; X64-LABEL: t3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: sarw %cl, {{.*}}(%rip) ; X64-NEXT: retq %shamt = and i16 %t, 31 @@ -82,9 +85,10 @@ ; ; X64-LABEL: t4: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt @@ -112,9 +116,10 @@ ; ; X64-LABEL: t5: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrq %cl, %rsi ; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shamt = and i64 %t, 191 %res = lshr i64 %val, %shamt @@ -147,7 +152,8 @@ ; ; X64-LABEL: t5ptr: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shrq %cl, (%rsi) ; X64-NEXT: retq %shamt = and i64 %t, 191 @@ -205,9 +211,9 @@ ; ; X64-LABEL: big_mask_constant: ; X64: # %bb.0: -; X64-NEXT: shrq $7, %rdi -; X64-NEXT: andl $134217728, %edi # imm = 0x8000000 ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $7, %rax +; X64-NEXT: andl $134217728, %eax # imm = 0x8000000 ; X64-NEXT: retq %and = and i64 %x, 17179869184 ; 0x400000000 %sh = lshr i64 %and, 7 Index: test/CodeGen/X86/shift-bmi2.ll =================================================================== --- test/CodeGen/X86/shift-bmi2.ll +++ test/CodeGen/X86/shift-bmi2.ll @@ -26,8 +26,8 @@ ; ; BMI264-LABEL: shl32i: ; BMI264: # %bb.0: -; BMI264-NEXT: shll $5, %edi ; BMI264-NEXT: movl %edi, %eax +; BMI264-NEXT: shll $5, %eax ; BMI264-NEXT: retq %shl = shl i32 %x, 5 ret i32 %shl @@ -69,6 +69,24 @@ } define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 4 +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64: ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rsi, %rdi, %rax @@ -78,16 +96,43 @@ } define i64 @shl64i(i64 %x) nounwind uwtable readnone { +; BMI2-LABEL: shl64i: +; BMI2: # %bb.0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64i: ; BMI264: # %bb.0: -; BMI264-NEXT: shlq $7, %rdi ; BMI264-NEXT: movq %rdi, %rax +; BMI264-NEXT: shlq $7, %rax ; BMI264-NEXT: retq %shl = shl i64 %x, 7 ret i64 %shl } define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: shl64p: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl (%eax), %esi +; BMI2-NEXT: movl 4(%eax), %edx +; BMI2-NEXT: shldl %cl, %esi, %edx +; BMI2-NEXT: shlxl %ecx, %esi, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 4 +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64p: ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rsi, (%rdi), %rax @@ -98,6 +143,15 @@ } define i64 @shl64pi(i64* %p) nounwind uwtable readnone { +; BMI2-LABEL: shl64pi: +; BMI2: # %bb.0: +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movl (%ecx), %eax +; BMI2-NEXT: movl 4(%ecx), %edx +; BMI2-NEXT: shldl $7, %eax, %edx +; BMI2-NEXT: shll $7, %eax +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64pi: ; BMI264: # %bb.0: ; BMI264-NEXT: movq (%rdi), %rax @@ -141,6 +195,24 @@ } define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 4 +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64: ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rsi, %rdi, %rax @@ -150,6 +222,25 @@ } define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: lshr64p: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 4 +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64p: ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rsi, (%rdi), %rax @@ -192,6 +283,24 @@ } define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 4 +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64: ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rsi, %rdi, %rax @@ -201,6 +310,25 @@ } define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +; BMI2-LABEL: ashr64p: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 8 +; BMI2-NEXT: .cfi_offset %esi, -8 +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: movl (%edx), %eax +; BMI2-NEXT: movl 4(%edx), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: .cfi_def_cfa_offset 4 +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64p: ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rsi, (%rdi), %rax @@ -227,6 +355,21 @@ } define i64 @shl64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: shl64and: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shldl %cl, %eax, %edx +; BMI2-NEXT: shlxl %ecx, %eax, %esi +; BMI2-NEXT: xorl %eax, %eax +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %edx +; BMI2-NEXT: cmovel %esi, %eax +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: shl64and: ; BMI264: # %bb.0: ; BMI264-NEXT: shlxq %rdi, %rsi, %rax @@ -253,6 +396,21 @@ } define i64 @lshr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: lshr64and: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: shrxl %ecx, %edx, %esi +; BMI2-NEXT: xorl %edx, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: lshr64and: ; BMI264: # %bb.0: ; BMI264-NEXT: shrxq %rdi, %rsi, %rax @@ -279,6 +437,21 @@ } define i64 @ashr64and(i64 %t, i64 %val) nounwind { +; BMI2-LABEL: ashr64and: +; BMI2: # %bb.0: +; BMI2-NEXT: pushl %esi +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; BMI2-NEXT: shrdl %cl, %edx, %eax +; BMI2-NEXT: sarxl %ecx, %edx, %esi +; BMI2-NEXT: sarl $31, %edx +; BMI2-NEXT: testb $32, %cl +; BMI2-NEXT: cmovnel %esi, %eax +; BMI2-NEXT: cmovel %esi, %edx +; BMI2-NEXT: popl %esi +; BMI2-NEXT: retl +; ; BMI264-LABEL: ashr64and: ; BMI264: # %bb.0: ; BMI264-NEXT: sarxq %rdi, %rsi, %rax Index: test/CodeGen/X86/shift-double-x86_64.ll =================================================================== --- test/CodeGen/X86/shift-double-x86_64.ll +++ test/CodeGen/X86/shift-double-x86_64.ll @@ -6,10 +6,11 @@ define i64 @test1(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -22,10 +23,11 @@ define i64 @test2(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $63, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: andl $63, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %and = and i64 %bits, 63 %and64 = sub i64 64, %and @@ -38,9 +40,10 @@ define i64 @test3(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = lshr i64 %lo, %bits64 @@ -52,9 +55,10 @@ define i64 @test4(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rdi, %rsi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rdi, %rax ; CHECK-NEXT: retq %bits64 = sub i64 64, %bits %sh_lo = shl i64 %hi, %bits64 @@ -66,9 +70,10 @@ define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shldq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = lshr i64 %lo, 1 @@ -81,9 +86,10 @@ define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = shl i64 %lo, 1 @@ -96,9 +102,10 @@ define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shrdq %cl, %rsi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = add i64 %lo, %lo Index: test/CodeGen/X86/shift-double.ll =================================================================== --- test/CodeGen/X86/shift-double.ll +++ test/CodeGen/X86/shift-double.ll @@ -26,8 +26,9 @@ ; X64-LABEL: test1: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %shift.upgrd.1 = zext i8 %C to i64 ; [#uses=1] %Y = shl i64 %X, %shift.upgrd.1 ; [#uses=1] @@ -57,8 +58,9 @@ ; X64-LABEL: test2: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %shift.upgrd.2 = zext i8 %C to i64 ; [#uses=1] %Y = ashr i64 %X, %shift.upgrd.2 ; [#uses=1] @@ -87,8 +89,9 @@ ; X64-LABEL: test3: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %shift.upgrd.3 = zext i8 %C to i64 ; [#uses=1] %Y = lshr i64 %X, %shift.upgrd.3 ; [#uses=1] @@ -109,8 +112,9 @@ ; X64-LABEL: test4: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %shift.upgrd.4 = zext i8 %C to i32 ; [#uses=1] %X = shl i32 %A, %shift.upgrd.4 ; [#uses=1] @@ -133,8 +137,10 @@ ; X64-LABEL: test5: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldw %cl, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldw %cl, %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %shift.upgrd.6 = zext i8 %C to i16 ; [#uses=1] %X = shl i16 %A, %shift.upgrd.6 ; [#uses=1] @@ -159,8 +165,9 @@ ; X64-LABEL: test6: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %shift.upgrd.4 = zext i8 %C to i32 ; [#uses=1] %X = lshr i32 %A, %shift.upgrd.4 ; [#uses=1] @@ -183,8 +190,10 @@ ; X64-LABEL: test7: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdw %cl, %si, %di ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdw %cl, %si, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %shift.upgrd.6 = zext i8 %C to i16 ; [#uses=1] %X = lshr i16 %A, %shift.upgrd.6 ; [#uses=1] @@ -212,10 +221,11 @@ ; ; X64-LABEL: test8: ; X64: # %bb.0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shlq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -235,10 +245,11 @@ ; ; X64-LABEL: test9: ; X64: # %bb.0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: sarq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -258,10 +269,11 @@ ; ; X64-LABEL: test10: ; X64: # %bb.0: -; X64-NEXT: andb $31, %sil ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: shrq %cl, %rdi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andb $31, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %and = and i32 %bits, 31 %sh_prom = zext i32 %and to i64 @@ -284,10 +296,11 @@ ; ; X64-LABEL: test11: ; X64: # %bb.0: -; X64-NEXT: andl $31, %edx ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $31, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %and = and i32 %bits, 31 %and32 = sub i32 32, %and @@ -310,10 +323,11 @@ ; ; X64-LABEL: test12: ; X64: # %bb.0: -; X64-NEXT: andl $31, %edx ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl $31, %ecx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %and = and i32 %bits, 31 %and32 = sub i32 32, %and @@ -335,8 +349,9 @@ ; X64-LABEL: test13: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = sub i32 32, %bits %sh_lo = lshr i32 %lo, %bits32 @@ -357,8 +372,9 @@ ; X64-LABEL: test14: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %edi, %esi ; X64-NEXT: movl %esi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %bits32 = sub i32 32, %bits %sh_lo = shl i32 %hi, %bits32 @@ -379,8 +395,9 @@ ; X64-LABEL: test15: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shldl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = lshr i32 %lo, 1 @@ -402,8 +419,9 @@ ; X64-LABEL: test16: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = shl i32 %lo, 1 @@ -425,8 +443,9 @@ ; X64-LABEL: test17: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shrdl %cl, %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrdl %cl, %esi, %eax ; X64-NEXT: retq %bits32 = xor i32 %bits, 31 %lo2 = add i32 %lo, %lo Index: test/CodeGen/X86/shift-pair.ll =================================================================== --- test/CodeGen/X86/shift-pair.ll +++ test/CodeGen/X86/shift-pair.ll @@ -4,9 +4,9 @@ define i64 @test(i64 %A) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: -; CHECK-NEXT: shrq $54, %rdi -; CHECK-NEXT: andl $-4, %edi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $54, %rax +; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: retq %B = lshr i64 %A, 56 %C = shl i64 %B, 2 Index: test/CodeGen/X86/shuffle-of-insert.ll =================================================================== --- test/CodeGen/X86/shuffle-of-insert.ll +++ test/CodeGen/X86/shuffle-of-insert.ll @@ -6,15 +6,15 @@ define <4 x i32> @ins_elt_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_0: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_0: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $0, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $0, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_0: @@ -36,8 +36,8 @@ ; ; SSE4-LABEL: ins_elt_1: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $1, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $1, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_1: @@ -54,16 +54,16 @@ define <4 x i32> @ins_elt_2_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_2_commute: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_2_commute: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $2, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $2, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_2_commute: @@ -78,16 +78,16 @@ define <4 x i32> @ins_elt_3_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_3_commute: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_3_commute: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $3, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $3, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_3_commute: @@ -104,16 +104,16 @@ define <4 x i32> @ins_elt_0_to_2(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_0_to_2: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_0_to_2: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $2, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $2, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_0_to_2: @@ -128,15 +128,15 @@ define <4 x i32> @ins_elt_1_to_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_1_to_0: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_1_to_0: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $0, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $0, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_1_to_0: @@ -151,16 +151,16 @@ define <4 x i32> @ins_elt_2_to_3(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { ; SSE2-LABEL: ins_elt_2_to_3: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE2-NEXT: retq ; ; SSE4-LABEL: ins_elt_2_to_3: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $3, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $3, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_2_to_3: @@ -182,8 +182,8 @@ ; ; SSE4-LABEL: ins_elt_3_to_1: ; SSE4: # %bb.0: -; SSE4-NEXT: pinsrd $1, %edi, %xmm1 ; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pinsrd $1, %edi, %xmm0 ; SSE4-NEXT: retq ; ; AVX-LABEL: ins_elt_3_to_1: Index: test/CodeGen/X86/signbit-shift.ll =================================================================== --- test/CodeGen/X86/signbit-shift.ll +++ test/CodeGen/X86/signbit-shift.ll @@ -6,9 +6,9 @@ define i32 @zext_ifpos(i32 %x) { ; CHECK-LABEL: zext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -57,9 +57,9 @@ define i32 @sext_ifpos(i32 %x) { ; CHECK-LABEL: sext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi -; CHECK-NEXT: sarl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: sarl $31, %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 @@ -109,8 +109,8 @@ define i32 @zext_ifneg(i32 %x) { ; CHECK-LABEL: zext_ifneg: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 0 %r = zext i1 %c to i32 @@ -145,8 +145,8 @@ define i32 @sext_ifneg(i32 %x) { ; CHECK-LABEL: sext_ifneg: ; CHECK: # %bb.0: -; CHECK-NEXT: sarl $31, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $31, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 0 %r = sext i1 %c to i32 @@ -231,9 +231,9 @@ define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: subl %edi, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: shrl $31, %edi +; CHECK-NEXT: subl %edi, %eax ; CHECK-NEXT: retq %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -255,9 +255,9 @@ define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: xorl $43, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: xorl $43, %eax ; CHECK-NEXT: retq %sh = lshr i32 %x, 31 %r = sub i32 43, %sh Index: test/CodeGen/X86/sret-implicit.ll =================================================================== --- test/CodeGen/X86/sret-implicit.ll +++ test/CodeGen/X86/sret-implicit.ll @@ -10,8 +10,8 @@ } ; X64-LABEL: sret_void -; X64-DAG: movl $0, (%rdi) ; X64-DAG: movq %rdi, %rax +; X64-DAG: movl $0, (%rdi) ; X64: retq ; X86-LABEL: sret_void @@ -24,8 +24,8 @@ } ; X64-LABEL: sret_demoted -; X64-DAG: movq $0, (%rdi) ; X64-DAG: movq %rdi, %rax +; X64-DAG: movq $0, (%rdi) ; X64: retq ; X86-LABEL: sret_demoted Index: test/CodeGen/X86/sse1.ll =================================================================== --- test/CodeGen/X86/sse1.ll +++ test/CodeGen/X86/sse1.ll @@ -190,26 +190,27 @@ ; ; X64-LABEL: PR30512: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: sete %dil +; X64-NEXT: negl %edi +; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %edi, %edi ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: sete %dil +; X64-NEXT: negl %edi +; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: sete %cl +; X64-NEXT: negl %ecx +; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpl %r9d, %esi -; X64-NEXT: sete %al -; X64-NEXT: negl %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: sete %cl +; X64-NEXT: negl %ecx +; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -218,8 +219,7 @@ ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; X64-NEXT: andps {{.*}}(%rip), %xmm2 -; X64-NEXT: movaps %xmm2, (%rdi) -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movaps %xmm2, (%rax) ; X64-NEXT: retq %cmp = icmp eq <4 x i32> %x, %y %zext = zext <4 x i1> %cmp to <4 x i32> Index: test/CodeGen/X86/sse3-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse3-intrinsics-x86.ll +++ test/CodeGen/X86/sse3-intrinsics-x86.ll @@ -143,8 +143,8 @@ ; ; X64-LABEL: monitor: ; X64: ## %bb.0: -; X64-NEXT: leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07] ; X64-NEXT: movl %esi, %ecx ## encoding: [0x89,0xf1] +; X64-NEXT: leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07] ; X64-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] ; X64-NEXT: retq ## encoding: [0xc3] tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) @@ -162,8 +162,8 @@ ; ; X64-LABEL: mwait: ; X64: ## %bb.0: -; X64-NEXT: movl %edi, %ecx ## encoding: [0x89,0xf9] ; X64-NEXT: movl %esi, %eax ## encoding: [0x89,0xf0] +; X64-NEXT: movl %edi, %ecx ## encoding: [0x89,0xf9] ; X64-NEXT: mwait ## encoding: [0x0f,0x01,0xc9] ; X64-NEXT: retq ## encoding: [0xc3] tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -768,120 +768,120 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; GENERIC-LABEL: test_monitor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; GENERIC-NEXT: monitor # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_monitor: ; ATOM: # %bb.0: -; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50] +; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; ATOM-NEXT: monitor # sched: [45:22.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_monitor: ; SLM: # %bb.0: -; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] +; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] ; SLM-NEXT: monitor # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_monitor: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33] +; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-SSE-NEXT: monitor # sched: [100:0.33] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_monitor: ; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] +; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SANDY-NEXT: monitor # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_monitor: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-SSE-NEXT: monitor # sched: [100:0.25] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_monitor: ; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_monitor: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_monitor: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: monitor # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_monitor: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_monitor: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-SSE-NEXT: monitor # sched: [100:0.25] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_monitor: ; SKX: # %bb.0: -; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: monitor # sched: [100:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_monitor: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-SSE-NEXT: monitor # sched: [100:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_monitor: ; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BTVER2-NEXT: monitor # sched: [100:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_monitor: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_monitor: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] ; ZNVER1-NEXT: monitor # sched: [100:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) @@ -1273,120 +1273,120 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; GENERIC-LABEL: test_mwait: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] ; GENERIC-NEXT: mwait # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mwait: ; ATOM: # %bb.0: -; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] +; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; ATOM-NEXT: mwait # sched: [46:23.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_mwait: ; SLM: # %bb.0: -; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] +; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] ; SLM-NEXT: mwait # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_mwait: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-SSE-NEXT: mwait # sched: [100:0.33] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_mwait: ; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] +; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] ; SANDY-NEXT: mwait # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_mwait: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-SSE-NEXT: mwait # sched: [20:2.50] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_mwait: ; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_mwait: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_mwait: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: mwait # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_mwait: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_mwait: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-SSE-NEXT: mwait # sched: [20:2.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mwait: ; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: mwait # sched: [20:2.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_mwait: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] ; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] ; BTVER2-SSE-NEXT: mwait # sched: [100:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_mwait: ; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50] +; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] ; BTVER2-NEXT: mwait # sched: [100:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_mwait: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_mwait: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] ; ZNVER1-NEXT: mwait # sched: [100:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) Index: test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll @@ -19,8 +19,8 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; CHECK-LABEL: test_mm_crc64_u64: ; CHECK: # %bb.0: -; CHECK-NEXT: crc32q %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: crc32q %rsi, %rax ; CHECK-NEXT: retq %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res Index: test/CodeGen/X86/sse42-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -35,22 +35,22 @@ ; ; X64-SSE-LABEL: test_mm_cmpestra: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %r8d, %r8d -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %esi ; X64-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-SSE-NEXT: seta %r8b -; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: seta %sil +; X64-SSE-NEXT: movl %esi, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestra: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %r8d, %r8d -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax +; X64-AVX-NEXT: xorl %esi, %esi ; X64-AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; X64-AVX-NEXT: seta %r8b -; X64-AVX-NEXT: movl %r8d, %eax +; X64-AVX-NEXT: seta %sil +; X64-AVX-NEXT: movl %esi, %eax ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -86,22 +86,22 @@ ; ; X64-SSE-LABEL: test_mm_cmpestrc: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %r8d, %r8d -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %esi ; X64-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-SSE-NEXT: setb %r8b -; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: setb %sil +; X64-SSE-NEXT: movl %esi, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestrc: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %r8d, %r8d -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax +; X64-AVX-NEXT: xorl %esi, %esi ; X64-AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; X64-AVX-NEXT: setb %r8b -; X64-AVX-NEXT: movl %r8d, %eax +; X64-AVX-NEXT: setb %sil +; X64-AVX-NEXT: movl %esi, %eax ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -129,16 +129,16 @@ ; ; X64-SSE-LABEL: test_mm_cmpestri: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 ; X64-SSE-NEXT: movl %ecx, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestri: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 ; X64-AVX-NEXT: movl %ecx, %eax ; X64-AVX-NEXT: retq @@ -166,15 +166,15 @@ ; ; X64-SSE-LABEL: test_mm_cmpestrm: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestrm: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: vpcmpestrm $7, %xmm1, %xmm0 ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> @@ -212,22 +212,22 @@ ; ; X64-SSE-LABEL: test_mm_cmpestro: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %r8d, %r8d -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %esi ; X64-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-SSE-NEXT: seto %r8b -; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: seto %sil +; X64-SSE-NEXT: movl %esi, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestro: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %r8d, %r8d -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax +; X64-AVX-NEXT: xorl %esi, %esi ; X64-AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; X64-AVX-NEXT: seto %r8b -; X64-AVX-NEXT: movl %r8d, %eax +; X64-AVX-NEXT: seto %sil +; X64-AVX-NEXT: movl %esi, %eax ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -263,22 +263,22 @@ ; ; X64-SSE-LABEL: test_mm_cmpestrs: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %r8d, %r8d -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %esi ; X64-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-SSE-NEXT: sets %r8b -; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: sets %sil +; X64-SSE-NEXT: movl %esi, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestrs: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %r8d, %r8d -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax +; X64-AVX-NEXT: xorl %esi, %esi ; X64-AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; X64-AVX-NEXT: sets %r8b -; X64-AVX-NEXT: movl %r8d, %eax +; X64-AVX-NEXT: sets %sil +; X64-AVX-NEXT: movl %esi, %eax ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -314,22 +314,22 @@ ; ; X64-SSE-LABEL: test_mm_cmpestrz: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %r8d, %r8d -; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: movl %esi, %edx +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %esi ; X64-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-SSE-NEXT: sete %r8b -; X64-SSE-NEXT: movl %r8d, %eax +; X64-SSE-NEXT: sete %sil +; X64-SSE-NEXT: movl %esi, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cmpestrz: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %r8d, %r8d -; X64-AVX-NEXT: movl %edi, %eax ; X64-AVX-NEXT: movl %esi, %edx +; X64-AVX-NEXT: movl %edi, %eax +; X64-AVX-NEXT: xorl %esi, %esi ; X64-AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; X64-AVX-NEXT: sete %r8b -; X64-AVX-NEXT: movl %r8d, %eax +; X64-AVX-NEXT: sete %sil +; X64-AVX-NEXT: movl %esi, %eax ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -510,8 +510,8 @@ ; ; X64-LABEL: test_mm_crc32_u8: ; X64: # %bb.0: -; X64-NEXT: crc32b %sil, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32b %sil, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) ret i32 %res @@ -527,8 +527,8 @@ ; ; X64-LABEL: test_mm_crc32_u16: ; X64: # %bb.0: -; X64-NEXT: crc32w %si, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32w %si, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) ret i32 %res @@ -544,8 +544,8 @@ ; ; X64-LABEL: test_mm_crc32_u32: ; X64: # %bb.0: -; X64-NEXT: crc32l %esi, %edi ; X64-NEXT: movl %edi, %eax +; X64-NEXT: crc32l %esi, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res Index: test/CodeGen/X86/sse42-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-x86.ll +++ test/CodeGen/X86/sse42-intrinsics-x86.ll @@ -626,8 +626,8 @@ ; ; X64-LABEL: crc32_32_8: ; X64: ## %bb.0: -; X64-NEXT: crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) ret i32 %tmp @@ -643,8 +643,8 @@ ; ; X64-LABEL: crc32_32_16: ; X64: ## %bb.0: -; X64-NEXT: crc32w %si, %edi ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xfe] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) ret i32 %tmp @@ -660,8 +660,8 @@ ; ; X64-LABEL: crc32_32_32: ; X64: ## %bb.0: -; X64-NEXT: crc32l %esi, %edi ## encoding: [0xf2,0x0f,0x38,0xf1,0xfe] ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) ret i32 %tmp Index: test/CodeGen/X86/sse42-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-x86_64.ll +++ test/CodeGen/X86/sse42-intrinsics-x86_64.ll @@ -9,8 +9,8 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; CHECK-LABEL: crc32_64_8: ; CHECK: ## %bb.0: -; CHECK-NEXT: crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp @@ -19,8 +19,8 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: crc32_64_64: ; CHECK: ## %bb.0: -; CHECK-NEXT: crc32q %rsi, %rdi ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xfe] ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -21,114 +21,114 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: crc32_32_8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_8: ; SLM: # %bb.0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: crc32_32_8: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_32_8: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: crc32_32_8: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: crc32_32_8: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_32_8: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: crc32_32_8: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: crc32_32_8: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_8: ; SKX: # %bb.0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: crc32_32_8: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: crc32b %sil, %edi # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32b (%rdx), %edi # sched: [6:2.00] ; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:2.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [6:2.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: crc32_32_8: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: crc32_32_8: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -140,114 +140,114 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: crc32_32_16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32w %si, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_16: ; SLM: # %bb.0: -; SLM-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SLM-NEXT: crc32w (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SLM-NEXT: crc32w (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: crc32_32_16: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_32_16: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: crc32_32_16: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: crc32_32_16: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_32_16: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: crc32_32_16: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: crc32_32_16: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKX-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_16: ; SKX: # %bb.0: -; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32w %si, %eax # sched: [3:1.00] +; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: crc32_32_16: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: crc32w %si, %edi # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32w (%rdx), %edi # sched: [6:2.00] ; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [6:2.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32w %si, %edi # sched: [3:2.00] -; BTVER2-NEXT: crc32w (%rdx), %edi # sched: [6:2.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: crc32w %si, %eax # sched: [3:2.00] +; BTVER2-NEXT: crc32w (%rdx), %eax # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: crc32_32_16: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32w (%rdx), %edi # sched: [10:1.00] ; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: crc32_32_16: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32w (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32w %si, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) %2 = load i16, i16 *%a2 @@ -259,114 +259,114 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: crc32_32_32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_32: ; SLM: # %bb.0: -; SLM-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SLM-NEXT: crc32l (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SLM-NEXT: crc32l (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: crc32_32_32: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_32_32: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: crc32_32_32: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: crc32_32_32: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_32_32: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: crc32_32_32: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: crc32_32_32: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKX-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_32: ; SKX: # %bb.0: -; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: crc32_32_32: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: crc32l %esi, %edi # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32l (%rdx), %edi # sched: [6:2.00] ; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [6:2.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:2.00] -; BTVER2-NEXT: crc32l (%rdx), %edi # sched: [6:2.00] ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-NEXT: crc32l %esi, %eax # sched: [3:2.00] +; BTVER2-NEXT: crc32l (%rdx), %eax # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: crc32_32_32: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32l (%rdx), %edi # sched: [10:1.00] ; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: crc32_32_32: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32l (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-NEXT: crc32l %esi, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) %2 = load i32, i32 *%a2 @@ -378,114 +378,114 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; GENERIC-LABEL: crc32_64_8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_8: ; SLM: # %bb.0: -; SLM-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %edi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: crc32_64_8: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_64_8: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: crc32_64_8: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: crc32_64_8: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_64_8: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: crc32_64_8: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: crc32_64_8: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_8: ; SKX: # %bb.0: -; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: crc32_64_8: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: crc32b %sil, %edi # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32b (%rdx), %edi # sched: [6:2.00] ; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:2.00] -; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [6:2.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: crc32_64_8: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: crc32_64_8: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] +; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1) %2 = load i8, i8 *%a2 @@ -497,114 +497,114 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: crc32_64_64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; GENERIC-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_64: ; SLM: # %bb.0: -; SLM-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SLM-NEXT: crc32q (%rdx), %rdi # sched: [6:1.00] ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SLM-NEXT: crc32q (%rdx), %rax # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: crc32_64_64: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_64_64: ; SANDY: # %bb.0: -; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SANDY-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: crc32_64_64: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # %bb.0: -; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; HASWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: crc32_64_64: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: crc32_64_64: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: crc32_64_64: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKYLAKE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: crc32_64_64: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKX-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKX-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_64: ; SKX: # %bb.0: -; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: crc32_64_64: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32q (%rdx), %rdi # sched: [6:2.00] ; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [6:2.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # %bb.0: -; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:2.00] -; BTVER2-NEXT: crc32q (%rdx), %rdi # sched: [6:2.00] ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-NEXT: crc32q %rsi, %rax # sched: [3:2.00] +; BTVER2-NEXT: crc32q (%rdx), %rax # sched: [6:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: crc32_64_64: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00] ; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: crc32_64_64: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; ZNVER1-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00] ; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-NEXT: crc32q %rsi, %rax # sched: [3:1.00] +; ZNVER1-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) %2 = load i64, i64 *%a2 Index: test/CodeGen/X86/sttni.ll =================================================================== --- test/CodeGen/X86/sttni.ll +++ test/CodeGen/X86/sttni.ll @@ -20,8 +20,8 @@ ; ; X64-LABEL: pcmpestri_reg_eq_i8: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: setae %al ; X64-NEXT: retq @@ -42,8 +42,8 @@ ; ; X64-LABEL: pcmpestri_reg_idx_i8: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq @@ -81,8 +81,8 @@ ; ; X64-LABEL: pcmpestri_reg_diff_i8: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: cmpl $16, %ecx @@ -133,8 +133,8 @@ ; X64-LABEL: pcmpestri_mem_eq_i8: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %ecx, %edx ; X64-NEXT: pcmpestri $24, (%r8), %xmm0 ; X64-NEXT: setae %al @@ -166,8 +166,8 @@ ; X64-LABEL: pcmpestri_mem_idx_i8: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %ecx, %edx ; X64-NEXT: pcmpestri $24, (%r8), %xmm0 ; X64-NEXT: movl %ecx, %eax @@ -216,9 +216,9 @@ ; ; X64-LABEL: pcmpestri_mem_diff_i8: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %esi, %eax ; X64-NEXT: movdqu (%rdi), %xmm1 ; X64-NEXT: movdqu (%rdx), %xmm0 -; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl %ecx, %edx ; X64-NEXT: pcmpestri $24, %xmm0, %xmm1 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx @@ -268,8 +268,8 @@ ; ; X64-LABEL: pcmpestri_reg_eq_i16: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: setae %al ; X64-NEXT: retq @@ -292,8 +292,8 @@ ; ; X64-LABEL: pcmpestri_reg_idx_i16: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq @@ -334,8 +334,8 @@ ; ; X64-LABEL: pcmpestri_reg_diff_i16: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: cmpl $16, %ecx @@ -388,8 +388,8 @@ ; X64-LABEL: pcmpestri_mem_eq_i16: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %ecx, %edx ; X64-NEXT: pcmpestri $25, (%r8), %xmm0 ; X64-NEXT: setae %al @@ -423,8 +423,8 @@ ; X64-LABEL: pcmpestri_mem_idx_i16: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %esi, %eax +; X64-NEXT: movdqu (%rdi), %xmm0 ; X64-NEXT: movl %ecx, %edx ; X64-NEXT: pcmpestri $25, (%r8), %xmm0 ; X64-NEXT: movl %ecx, %eax @@ -476,9 +476,9 @@ ; ; X64-LABEL: pcmpestri_mem_diff_i16: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %esi, %eax ; X64-NEXT: movdqu (%rdi), %xmm1 ; X64-NEXT: movdqu (%rdx), %xmm0 -; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl %ecx, %edx ; X64-NEXT: pcmpestri $25, %xmm0, %xmm1 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx @@ -989,13 +989,13 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: xorl %r10d, %r10d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X64-NEXT: setb %r10b +; X64-NEXT: setb %sil ; X64-NEXT: movl %ecx, (%r9) -; X64-NEXT: movl %r10d, (%r8) +; X64-NEXT: movl %esi, (%r8) ; X64-NEXT: retq entry: %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) @@ -1026,13 +1026,13 @@ ; X64-LABEL: pcmpestr_mask_flag: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: xorl %r9d, %r9d -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 -; X64-NEXT: setb %r9b +; X64-NEXT: setb %sil ; X64-NEXT: movdqa %xmm0, (%r8) -; X64-NEXT: movl %r9d, (%rcx) +; X64-NEXT: movl %esi, (%rcx) ; X64-NEXT: retq entry: %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) @@ -1064,9 +1064,9 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movdqa %xmm0, %xmm2 ; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 ; X64-NEXT: movdqa %xmm0, (%r9) @@ -1110,9 +1110,9 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rcx, %r9 ; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movdqa %xmm0, %xmm2 ; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 @@ -1321,9 +1321,9 @@ ; ; X64-LABEL: pcmpestri_nontemporal: ; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax ; X64-NEXT: movntdqa (%rsi), %xmm1 ; X64-NEXT: xorl %esi, %esi -; X64-NEXT: movl %edi, %eax ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 ; X64-NEXT: setb %sil ; X64-NEXT: movl %esi, %eax Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -4,9 +4,9 @@ define i128 @sub128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: sub128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq %rdx, %rdi -; CHECK-NEXT: sbbq %rcx, %rsi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rdx, %rax +; CHECK-NEXT: sbbq %rcx, %rsi ; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: retq entry: @@ -17,6 +17,7 @@ define i256 @sub256(i256 %a, i256 %b) nounwind { ; CHECK-LABEL: sub256: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: subq %r9, %rsi ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx @@ -25,7 +26,6 @@ ; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: movq %rcx, 16(%rdi) ; CHECK-NEXT: movq %r8, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %0 = sub i256 %a, %b @@ -37,19 +37,19 @@ define %S @negate(%S* nocapture readonly %this) { ; CHECK-LABEL: negate: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: xorl %r8d, %r8d -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: subq (%rsi), %rcx -; CHECK-NEXT: movl $0, %edx -; CHECK-NEXT: sbbq 8(%rsi), %rdx -; CHECK-NEXT: movl $0, %eax -; CHECK-NEXT: sbbq 16(%rsi), %rax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: subq (%rsi), %rdx +; CHECK-NEXT: movl $0, %edi +; CHECK-NEXT: sbbq 8(%rsi), %rdi +; CHECK-NEXT: movl $0, %ecx +; CHECK-NEXT: sbbq 16(%rsi), %rcx ; CHECK-NEXT: sbbq 24(%rsi), %r8 -; CHECK-NEXT: movq %rcx, (%rdi) -; CHECK-NEXT: movq %rdx, 8(%rdi) -; CHECK-NEXT: movq %rax, 16(%rdi) -; CHECK-NEXT: movq %r8, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rdi, 8(%rax) +; CHECK-NEXT: movq %rcx, 16(%rax) +; CHECK-NEXT: movq %r8, 24(%rax) ; CHECK-NEXT: retq entry: %0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0 @@ -90,29 +90,29 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rdx -; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: setb %r10b +; CHECK-NEXT: setb %dil ; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: adcq 8(%rsi), %r10 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %r11d +; CHECK-NEXT: adcq 8(%rsi), %rdi +; CHECK-NEXT: setb %r10b +; CHECK-NEXT: movzbl %r10b, %r10d ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: addq %r10, %rcx -; CHECK-NEXT: adcq 16(%rsi), %r11 -; CHECK-NEXT: setb %al -; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: adcq 16(%rsi), %r10 +; CHECK-NEXT: setb %dil +; CHECK-NEXT: movzbl %dil, %edi ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %r11, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rax +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: adcq 24(%rsi), %rdi ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rax, %r9 -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r8, 16(%rdi) -; CHECK-NEXT: movq %r9, 24(%rdi) -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: addq %rdi, %r9 +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -457,18 +457,18 @@ ; CHECK-LABEL: gen9: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, %r8d ; CHECK-NEXT: retq ; ; CHECK-O0-LABEL: gen9: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb %dil, %al -; CHECK-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) # 1-byte Spill -; CHECK-O0-NEXT: movb -{{[0-9]+}}(%rsp), %dl # 1-byte Reload -; CHECK-O0-NEXT: movb -{{[0-9]+}}(%rsp), %cl # 1-byte Reload -; CHECK-O0-NEXT: movb -{{[0-9]+}}(%rsp), %r8b # 1-byte Reload +; CHECK-O0-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %dl # 1-byte Reload +; CHECK-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload +; CHECK-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %r8b # 1-byte Reload ; CHECK-O0-NEXT: retq %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 @@ -479,10 +479,10 @@ define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(double %keyd, i64 %keyi) { ; CHECK-LABEL: gen10: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm0, %xmm2 ; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: movq %rdi, %rcx ; CHECK-NEXT: movq %rdi, %r8 @@ -490,12 +490,12 @@ ; ; CHECK-O0-LABEL: gen10: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK-O0-NEXT: movsd -{{[0-9]+}}(%rsp), %xmm1 # 8-byte Reload +; CHECK-O0-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload ; CHECK-O0-NEXT: # xmm1 = mem[0],zero -; CHECK-O0-NEXT: movsd -{{[0-9]+}}(%rsp), %xmm2 # 8-byte Reload +; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Reload ; CHECK-O0-NEXT: # xmm2 = mem[0],zero -; CHECK-O0-NEXT: movsd -{{[0-9]+}}(%rsp), %xmm3 # 8-byte Reload +; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Reload ; CHECK-O0-NEXT: # xmm3 = mem[0],zero ; CHECK-O0-NEXT: movq %rdi, %rax ; CHECK-O0-NEXT: movq %rdi, %rdx Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s -; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-O0 %s -; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin -O0 | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc < %s -verify-machineinstrs -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s declare i8* @malloc(i64) declare void @free(i8*) @@ -37,8 +37,7 @@ ; CHECK-APPLE: testq %r12, %r12 ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller: @@ -250,9 +249,8 @@ ; CHECK-APPLE: testq %r12, %r12 ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12), +; CHECK-APPLE: movb 8(%rdi), ; CHECK-APPLE: movb %{{.*}}, -; CHECK-APPLE: movq %r12, %rdi ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller3: @@ -300,8 +298,7 @@ ; CHECK-APPLE: testq %r12, %r12 ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; The second swifterror value: @@ -310,8 +307,7 @@ ; CHECK-APPLE: testq %r12, %r12 ; CHECK-APPLE: jne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: movb 8(%r12) -; CHECK-APPLE: movq %r12, %rdi +; CHECK-APPLE: movb 8(%rdi) ; CHECK-APPLE: callq {{.*}}free ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: @@ -488,8 +484,8 @@ ; CHECK-i386: retl ; CHECK-APPLE-LABEL: empty_swiftcc: ; CHECK-APPLE: movl %edx, %ecx -; CHECK-APPLE: movl %edi, %eax -; CHECK-APPLE: movl %esi, %edx +; CHECK-APPLE-DAG: movl %edi, %eax +; CHECK-APPLE-DAG: movl %esi, %edx ; CHECK-APPLE: retq define swiftcc {i32, i32, i32} @empty_swiftcc({i32, i32, i32} , %swift_error** swifterror %error_ptr_ref) { entry: Index: test/CodeGen/X86/system-intrinsics-xsetbv.ll =================================================================== --- test/CodeGen/X86/system-intrinsics-xsetbv.ll +++ test/CodeGen/X86/system-intrinsics-xsetbv.ll @@ -11,8 +11,8 @@ ; CHECK64-LABEL: test_xsetbv ; CHECK64: movl %edx, %eax -; CHECK64: movl %edi, %ecx -; CHECK64: movl %esi, %edx +; CHECK64-DAG: movl %edi, %ecx +; CHECK64-DAG: movl %esi, %edx ; CHECK64: xsetbv ; CHECK64: ret Index: test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll @@ -40,10 +40,10 @@ ; X64-LABEL: test__blcic_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -89,10 +89,10 @@ ; X64-LABEL: test__blsic_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 @@ -104,10 +104,10 @@ ; X64-LABEL: test__t1mskc_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: addq $1, %rdi -; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: addq $1, %rax +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = add i64 %a0, 1 @@ -119,10 +119,10 @@ ; X64-LABEL: test__tzmsk_u64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorq $-1, %rax -; X64-NEXT: subq $1, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: xorq $-1, %rcx +; X64-NEXT: subq $1, %rax +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq %1 = xor i64 %a0, -1 %2 = sub i64 %a0, 1 Index: test/CodeGen/X86/tbm-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/tbm-intrinsics-fast-isel.ll +++ test/CodeGen/X86/tbm-intrinsics-fast-isel.ll @@ -72,10 +72,10 @@ ; X64-LABEL: test__blcic_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -154,10 +154,10 @@ ; X64-LABEL: test__blsic_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 @@ -178,10 +178,10 @@ ; X64-LABEL: test__t1mskc_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: addl $1, %edi -; X64-NEXT: orl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: addl $1, %eax +; X64-NEXT: orl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = add i32 %a0, 1 @@ -202,10 +202,10 @@ ; X64-LABEL: test__tzmsk_u32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl $-1, %eax -; X64-NEXT: subl $1, %edi -; X64-NEXT: andl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: xorl $-1, %ecx +; X64-NEXT: subl $1, %eax +; X64-NEXT: andl %ecx, %eax ; X64-NEXT: retq %1 = xor i32 %a0, -1 %2 = sub i32 %a0, 1 Index: test/CodeGen/X86/tbm_patterns.ll =================================================================== --- test/CodeGen/X86/tbm_patterns.ll +++ test/CodeGen/X86/tbm_patterns.ll @@ -52,10 +52,10 @@ define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovnel %edx, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -113,10 +113,10 @@ define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: shrl $4, %edi ; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovneq %rdx, %rsi -; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -151,11 +151,11 @@ define i32 @test_x86_tbm_blcfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = and i32 %t0, %a @@ -190,10 +190,10 @@ define i64 @test_x86_tbm_blcfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = and i64 %t0, %a @@ -230,12 +230,12 @@ define i32 @test_x86_tbm_blci_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a %t1 = xor i32 %t0, -1 @@ -273,11 +273,11 @@ define i64 @test_x86_tbm_blci_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a %t1 = xor i64 %t0, -1 @@ -335,12 +335,12 @@ define i32 @test_x86_tbm_blcic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: testl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: testl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -378,12 +378,12 @@ define i64 @test_x86_tbm_blcic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: testq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: testq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -419,11 +419,11 @@ define i32 @test_x86_tbm_blcmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: xorl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: xorl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = xor i32 %t0, %a @@ -458,10 +458,10 @@ define i64 @test_x86_tbm_blcmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: xorq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: xorq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = xor i64 %t0, %a @@ -496,11 +496,11 @@ define i32 @test_x86_tbm_blcs_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 %t1 = or i32 %t0, %a @@ -535,10 +535,10 @@ define i64 @test_x86_tbm_blcs_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq 1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 %t1 = or i64 %t0, %a @@ -573,11 +573,11 @@ define i32 @test_x86_tbm_blsfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -1(%rdi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 %t1 = or i32 %t0, %a @@ -612,10 +612,10 @@ define i64 @test_x86_tbm_blsfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq -1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: leaq -1(%rdi), %rcx +; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 %t1 = or i64 %t0, %a @@ -652,12 +652,12 @@ define i32 @test_x86_tbm_blsic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -695,12 +695,12 @@ define i64 @test_x86_tbm_blsic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 @@ -739,12 +739,12 @@ define i32 @test_x86_tbm_t1mskc_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: incl %edi +; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -783,12 +783,12 @@ define i64 @test_x86_tbm_t1mskc_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -827,12 +827,12 @@ define i32 @test_x86_tbm_tzmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: testl %edi, %eax -; CHECK-NEXT: cmovnel %edx, %esi ; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: decl %edi +; CHECK-NEXT: testl %edi, %ecx +; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -871,12 +871,12 @@ define i64 @test_x86_tbm_tzmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: testq %rdi, %rax -; CHECK-NEXT: cmovneq %rdx, %rsi ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: notq %rcx +; CHECK-NEXT: decq %rdi +; CHECK-NEXT: testq %rdi, %rcx +; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 Index: test/CodeGen/X86/trunc-subvector.ll =================================================================== --- test/CodeGen/X86/trunc-subvector.ll +++ test/CodeGen/X86/trunc-subvector.ll @@ -41,9 +41,8 @@ ; SSE2-LABEL: test3: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; AVX2-LABEL: test3: @@ -165,9 +164,9 @@ define <2 x i32> @test8(<8 x i32> %v) { ; SSE2-LABEL: test8: ; SSE2: # %bb.0: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; AVX2-LABEL: test8: Index: test/CodeGen/X86/twoaddr-lea.ll =================================================================== --- test/CodeGen/X86/twoaddr-lea.ll +++ test/CodeGen/X86/twoaddr-lea.ll @@ -11,8 +11,8 @@ define i32 @test1(i32 %X) nounwind { ; CHECK-LABEL: test1: -; CHECK-NOT: mov -; CHECK: leal 1(%rdi) +; CHECK: movl %edi, %eax +; CHECK: leal 1(%rax) %Z = add i32 %X, 1 store volatile i32 %Z, i32* @G ret i32 %X Index: test/CodeGen/X86/umul-with-overflow.ll =================================================================== --- test/CodeGen/X86/umul-with-overflow.ll +++ test/CodeGen/X86/umul-with-overflow.ll @@ -15,8 +15,8 @@ ; ; X64-LABEL: a: ; X64: # %bb.0: -; X64-NEXT: movl $3, %ecx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl $3, %ecx ; X64-NEXT: mull %ecx ; X64-NEXT: seto %al ; X64-NEXT: retq Index: test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -10,18 +10,20 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andb $60, %dil -; CHECK-NOBMI-NEXT: andb $-61, %sil -; CHECK-NOBMI-NEXT: orb %dil, %sil ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andb $60, %dil +; CHECK-NOBMI-NEXT: andb $-61, %al +; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andb $60, %dil -; CHECK-BMI-NEXT: andb $-61, %sil -; CHECK-BMI-NEXT: orb %dil, %sil ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andb $60, %dil +; CHECK-BMI-NEXT: andb $-61, %al +; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 60 %my = and i8 %y, -61 @@ -110,18 +112,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: in8_constmask: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %esi, %eax ; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: andb $60, %dil -; CHECK-NOBMI-NEXT: xorb %dil, %sil -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in8_constmask: ; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movl %esi, %eax ; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: andb $60, %dil -; CHECK-BMI-NEXT: xorb %dil, %sil -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i8 %x, %y %n1 = and i8 %n0, 60 @@ -132,18 +136,20 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-NOBMI-LABEL: in16_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $4080, %edi # imm = 0xFF0 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $4080, %eax # imm = 0xFF0 +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in16_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $4080, %edi # imm = 0xFF0 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $4080, %eax # imm = 0xFF0 +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i16 %x, %y %n1 = and i16 %n0, 4080 @@ -154,18 +160,18 @@ define i32 @in32_constmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in32_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in32_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -202,18 +208,18 @@ define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -224,18 +230,18 @@ define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -246,18 +252,18 @@ define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -272,20 +278,20 @@ define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y0_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -297,20 +303,20 @@ define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y1_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -416,18 +422,18 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: n0_badconstmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: andl $-16776960, %esi # imm = 0xFF000100 -; CHECK-NOBMI-NEXT: orl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: andl $-16776960, %eax # imm = 0xFF000100 +; CHECK-NOBMI-NEXT: orl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n0_badconstmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: andl $-16776960, %esi # imm = 0xFF000100 -; CHECK-BMI-NEXT: orl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: andl $-16776960, %eax # imm = 0xFF000100 +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %mx = and i32 %x, 16776960 %my = and i32 %y, -16776960 ; instead of -16776961 @@ -438,18 +444,18 @@ define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { ; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n1_thirdvar_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %edx, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $16776960, %eax # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %edx, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 Index: test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -10,18 +10,20 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andb $85, %dil -; CHECK-NOBMI-NEXT: andb $-86, %sil -; CHECK-NOBMI-NEXT: orb %dil, %sil ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andb $85, %dil +; CHECK-NOBMI-NEXT: andb $-86, %al +; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andb $85, %dil -; CHECK-BMI-NEXT: andb $-86, %sil -; CHECK-BMI-NEXT: orb %dil, %sil ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andb $85, %dil +; CHECK-BMI-NEXT: andb $-86, %al +; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 85 %my = and i8 %y, -86 @@ -110,18 +112,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: in8_constmask: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %esi, %eax ; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: andb $85, %dil -; CHECK-NOBMI-NEXT: xorb %dil, %sil -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in8_constmask: ; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movl %esi, %eax ; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: andb $85, %dil -; CHECK-BMI-NEXT: xorb %dil, %sil -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i8 %x, %y %n1 = and i8 %n0, 85 @@ -132,18 +136,20 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-NOBMI-LABEL: in16_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $21845, %edi # imm = 0x5555 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $21845, %eax # imm = 0x5555 +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in16_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $21845, %edi # imm = 0x5555 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $21845, %eax # imm = 0x5555 +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i16 %x, %y %n1 = and i16 %n0, 21845 @@ -154,18 +160,18 @@ define i32 @in32_constmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in32_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in32_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 @@ -202,18 +208,18 @@ define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 @@ -224,18 +230,18 @@ define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 @@ -246,18 +252,18 @@ define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 @@ -272,20 +278,20 @@ define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y0_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -297,20 +303,20 @@ define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y1_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -416,18 +422,18 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: n0_badconstmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: andl $-1431655765, %esi # imm = 0xAAAAAAAB -; CHECK-NOBMI-NEXT: orl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: andl $-1431655765, %eax # imm = 0xAAAAAAAB +; CHECK-NOBMI-NEXT: orl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n0_badconstmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-BMI-NEXT: andl $-1431655765, %esi # imm = 0xAAAAAAAB -; CHECK-BMI-NEXT: orl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: andl $-1431655765, %eax # imm = 0xAAAAAAAB +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %mx = and i32 %x, 1431655765 %my = and i32 %y, -1431655765 ; instead of -1431655766 @@ -438,18 +444,18 @@ define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { ; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-NOBMI-NEXT: xorl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n1_thirdvar_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; CHECK-BMI-NEXT: xorl %edx, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %edx, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 1431655765 Index: test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -10,18 +10,20 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andb $15, %dil -; CHECK-NOBMI-NEXT: andb $-16, %sil -; CHECK-NOBMI-NEXT: orb %dil, %sil ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andb $15, %dil +; CHECK-NOBMI-NEXT: andb $-16, %al +; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andb $15, %dil -; CHECK-BMI-NEXT: andb $-16, %sil -; CHECK-BMI-NEXT: orb %dil, %sil ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andb $15, %dil +; CHECK-BMI-NEXT: andb $-16, %al +; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 15 %my = and i8 %y, -16 @@ -110,18 +112,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: in8_constmask: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %esi, %eax ; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: andb $15, %dil -; CHECK-NOBMI-NEXT: xorb %dil, %sil -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in8_constmask: ; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movl %esi, %eax ; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: andb $15, %dil -; CHECK-BMI-NEXT: xorb %dil, %sil -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i8 %x, %y %n1 = and i8 %n0, 15 @@ -132,18 +136,20 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-NOBMI-LABEL: in16_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $3855, %edi # imm = 0xF0F -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $3855, %eax # imm = 0xF0F +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in16_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $3855, %edi # imm = 0xF0F -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $3855, %eax # imm = 0xF0F +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i16 %x, %y %n1 = and i16 %n0, 3855 @@ -154,18 +160,18 @@ define i32 @in32_constmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in32_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in32_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 @@ -202,18 +208,18 @@ define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 @@ -224,18 +230,18 @@ define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 @@ -246,18 +252,18 @@ define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 @@ -272,20 +278,20 @@ define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y0_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -297,20 +303,20 @@ define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y1_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -416,18 +422,18 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: n0_badconstmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: andl $-252645135, %esi # imm = 0xF0F0F0F1 -; CHECK-NOBMI-NEXT: orl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: andl $-252645135, %eax # imm = 0xF0F0F0F1 +; CHECK-NOBMI-NEXT: orl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n0_badconstmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: andl $-252645135, %esi # imm = 0xF0F0F0F1 -; CHECK-BMI-NEXT: orl %edi, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: andl $-252645135, %eax # imm = 0xF0F0F0F1 +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %mx = and i32 %x, 252645135 %my = and i32 %y, -252645135 ; instead of -252645136 @@ -438,18 +444,18 @@ define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { ; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-NOBMI-NEXT: xorl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n1_thirdvar_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; CHECK-BMI-NEXT: xorl %edx, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %edx, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 252645135 Index: test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -10,18 +10,20 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andb $15, %dil -; CHECK-NOBMI-NEXT: andb $-16, %sil -; CHECK-NOBMI-NEXT: orb %dil, %sil ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andb $15, %dil +; CHECK-NOBMI-NEXT: andb $-16, %al +; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andb $15, %dil -; CHECK-BMI-NEXT: andb $-16, %sil -; CHECK-BMI-NEXT: orb %dil, %sil ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andb $15, %dil +; CHECK-BMI-NEXT: andb $-16, %al +; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 15 %my = and i8 %y, -16 @@ -100,18 +102,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: in8_constmask: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %esi, %eax ; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: andb $15, %dil -; CHECK-NOBMI-NEXT: xorb %dil, %sil -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in8_constmask: ; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movl %esi, %eax ; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: andb $15, %dil -; CHECK-BMI-NEXT: xorb %dil, %sil -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i8 %x, %y %n1 = and i8 %n0, 15 Index: test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -6,20 +6,22 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) { ; CHECK-NOBMI-LABEL: out8: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notb %dl -; CHECK-NOBMI-NEXT: andb %sil, %dl -; CHECK-NOBMI-NEXT: orb %dil, %dl ; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notb %al +; CHECK-NOBMI-NEXT: andb %sil, %al +; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: notb %dl -; CHECK-BMI-NEXT: andb %sil, %dl -; CHECK-BMI-NEXT: orb %dil, %dl ; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: notb %al +; CHECK-BMI-NEXT: andb %sil, %al +; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, %mask %notmask = xor i8 %mask, -1 @@ -31,11 +33,12 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) { ; CHECK-NOBMI-LABEL: out16: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: andl %esi, %edx -; CHECK-NOBMI-NEXT: orl %edi, %edx ; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %esi, %eax +; CHECK-NOBMI-NEXT: orl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out16: @@ -55,11 +58,11 @@ define i32 @out32(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: out32: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: andl %esi, %edx -; CHECK-NOBMI-NEXT: orl %edi, %edx ; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %esi, %eax +; CHECK-NOBMI-NEXT: orl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out32: @@ -78,11 +81,11 @@ define i64 @out64(i64 %x, i64 %y, i64 %mask) { ; CHECK-NOBMI-LABEL: out64: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andq %rdx, %rdi -; CHECK-NOBMI-NEXT: notq %rdx -; CHECK-NOBMI-NEXT: andq %rsi, %rdx -; CHECK-NOBMI-NEXT: orq %rdi, %rdx ; CHECK-NOBMI-NEXT: movq %rdx, %rax +; CHECK-NOBMI-NEXT: andq %rdx, %rdi +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: andq %rsi, %rax +; CHECK-NOBMI-NEXT: orq %rdi, %rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out64: @@ -104,10 +107,11 @@ define i8 @in8(i8 %x, i8 %y, i8 %mask) { ; CHECK-NOBMI-LABEL: in8: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in8: @@ -126,10 +130,11 @@ define i16 @in16(i16 %x, i16 %y, i16 %mask) { ; CHECK-NOBMI-LABEL: in16: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in16: @@ -148,10 +153,10 @@ define i32 @in32(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in32: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in32: @@ -169,10 +174,10 @@ define i64 @in64(i64 %x, i64 %y, i64 %mask) { ; CHECK-NOBMI-LABEL: in64: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorq %rsi, %rdi -; CHECK-NOBMI-NEXT: andq %rdx, %rdi -; CHECK-NOBMI-NEXT: xorq %rsi, %rdi ; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: xorq %rsi, %rax +; CHECK-NOBMI-NEXT: andq %rdx, %rax +; CHECK-NOBMI-NEXT: xorq %rsi, %rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in64: @@ -192,10 +197,10 @@ define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_0_0_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_0_0_1: @@ -212,10 +217,10 @@ define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_0_1_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_0_1_0: @@ -232,10 +237,10 @@ define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_0_1_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_0_1_1: @@ -252,10 +257,10 @@ define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_1_0_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_1_0_0: @@ -272,10 +277,10 @@ define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_1_0_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_1_0_1: @@ -292,10 +297,10 @@ define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_1_1_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_1_1_0: @@ -312,10 +317,10 @@ define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_commutativity_1_1_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %edi, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_commutativity_1_1_1: @@ -335,11 +340,11 @@ define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { ; CHECK-NOBMI-LABEL: in_complex_y0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0: @@ -358,11 +363,11 @@ define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { ; CHECK-NOBMI-LABEL: in_complex_y1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1: @@ -384,11 +389,11 @@ define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { ; CHECK-NOBMI-LABEL: in_complex_m0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %ecx, %edx -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %ecx, %edx +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_m0: @@ -407,11 +412,11 @@ define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { ; CHECK-NOBMI-LABEL: in_complex_m1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %ecx, %edx -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %ecx, %edx +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_m1: @@ -433,12 +438,12 @@ define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-NOBMI-LABEL: in_complex_y0_m0: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: andl %edx, %esi ; CHECK-NOBMI-NEXT: xorl %r8d, %ecx -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0_m0: @@ -459,12 +464,12 @@ define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-NOBMI-LABEL: in_complex_y1_m0: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: andl %edx, %esi ; CHECK-NOBMI-NEXT: xorl %r8d, %ecx -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1_m0: @@ -485,12 +490,12 @@ define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-NOBMI-LABEL: in_complex_y0_m1: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: andl %edx, %esi ; CHECK-NOBMI-NEXT: xorl %r8d, %ecx -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0_m1: @@ -511,12 +516,12 @@ define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-NOBMI-LABEL: in_complex_y1_m1: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: andl %edx, %esi ; CHECK-NOBMI-NEXT: xorl %r8d, %ecx -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1_m1: @@ -540,18 +545,18 @@ define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: out_constant_varx_mone: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: orl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: orl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out_constant_varx_mone: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: orl %edx, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andl %edx, %eax +; CHECK-BMI-NEXT: notl %edx +; CHECK-BMI-NEXT: orl %edx, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %mx = and i32 %mask, %x @@ -562,10 +567,10 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_mone: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: notl %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_mone: @@ -603,11 +608,11 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_mone_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: notl %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_mone_invmask: @@ -649,10 +654,10 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_42: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl $42, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl $42, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl $42, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl $42, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_42: @@ -671,11 +676,11 @@ ; CHECK-NOBMI-LABEL: out_constant_varx_42_invmask: ; CHECK-NOBMI: # %bb.0: ; CHECK-NOBMI-NEXT: movl %edx, %eax -; CHECK-NOBMI-NEXT: notl %eax -; CHECK-NOBMI-NEXT: andl %edi, %eax -; CHECK-NOBMI-NEXT: andl $42, %edx -; CHECK-NOBMI-NEXT: orl %eax, %edx -; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: movl %edx, %ecx +; CHECK-NOBMI-NEXT: notl %ecx +; CHECK-NOBMI-NEXT: andl %edi, %ecx +; CHECK-NOBMI-NEXT: andl $42, %eax +; CHECK-NOBMI-NEXT: orl %ecx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out_constant_varx_42_invmask: @@ -694,11 +699,11 @@ define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_42_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: xorl $42, %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl $42, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: xorl $42, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: xorl $42, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_42_invmask: @@ -757,18 +762,18 @@ define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: out_constant_mone_vary_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: orl %edx, %esi ; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: orl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out_constant_mone_vary_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: orl %edx, %esi ; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andl %edx, %eax +; CHECK-BMI-NEXT: notl %edx +; CHECK-BMI-NEXT: orl %edx, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %mx = and i32 %notmask, -1 @@ -845,20 +850,20 @@ define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: out_constant_42_vary_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: andl %edx, %eax ; CHECK-NOBMI-NEXT: notl %edx ; CHECK-NOBMI-NEXT: andl $42, %edx -; CHECK-NOBMI-NEXT: orl %edx, %esi -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: orl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out_constant_42_vary_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: andl %edx, %eax ; CHECK-BMI-NEXT: notl %edx ; CHECK-BMI-NEXT: andl $42, %edx -; CHECK-BMI-NEXT: orl %edx, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orl %edx, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %mx = and i32 %notmask, 42 @@ -879,11 +884,11 @@ ; ; CHECK-BMI-LABEL: in_constant_42_vary_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: andl $42, %edx -; CHECK-BMI-NEXT: orl %esi, %edx ; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: notl %eax +; CHECK-BMI-NEXT: andl $42, %eax +; CHECK-BMI-NEXT: orl %esi, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 42, %y ; %x @@ -982,11 +987,11 @@ define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) { ; CHECK-NOBMI-LABEL: n0_badmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %ecx -; CHECK-NOBMI-NEXT: andl %esi, %ecx -; CHECK-NOBMI-NEXT: orl %edi, %ecx ; CHECK-NOBMI-NEXT: movl %ecx, %eax +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %esi, %eax +; CHECK-NOBMI-NEXT: orl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n0_badmask: @@ -1004,20 +1009,20 @@ define i32 @n0_badxor(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: n0_badxor: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: xorl $1, %edx -; CHECK-NOBMI-NEXT: andl %esi, %edx -; CHECK-NOBMI-NEXT: orl %edi, %edx ; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl $1, %eax +; CHECK-NOBMI-NEXT: andl %esi, %eax +; CHECK-NOBMI-NEXT: orl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n0_badxor: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl $1, %edx -; CHECK-BMI-NEXT: andl %esi, %edx -; CHECK-BMI-NEXT: orl %edi, %edx ; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl $1, %eax +; CHECK-BMI-NEXT: andl %esi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %mx = and i32 %x, %mask %notmask = xor i32 %mask, 1 ; instead of -1 @@ -1028,18 +1033,18 @@ define i32 @n1_thirdvar(i32 %x, i32 %y, i32 %z, i32 %mask) { ; CHECK-NOBMI-LABEL: n1_thirdvar: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: andl %ecx, %edi -; CHECK-NOBMI-NEXT: xorl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: andl %ecx, %eax +; CHECK-NOBMI-NEXT: xorl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: n1_thirdvar: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: andl %ecx, %edi -; CHECK-BMI-NEXT: xorl %edx, %edi ; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: andl %ecx, %eax +; CHECK-BMI-NEXT: xorl %edx, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask Index: test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -10,13 +10,13 @@ define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_varx_mone: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_varx_mone: @@ -49,11 +49,11 @@ define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_varx_mone: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 ; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_varx_mone: @@ -84,12 +84,12 @@ define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask: @@ -120,6 +120,7 @@ define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2 @@ -127,7 +128,6 @@ ; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: @@ -161,13 +161,13 @@ define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_varx_42: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_varx_42: @@ -198,13 +198,13 @@ define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_varx_42: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_varx_42: @@ -235,13 +235,13 @@ define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask: @@ -273,13 +273,13 @@ define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask: @@ -310,12 +310,12 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_mone_vary: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_mone_vary: @@ -345,12 +345,12 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_mone_vary: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary: @@ -380,13 +380,13 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask: @@ -420,13 +420,13 @@ define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: @@ -459,13 +459,13 @@ define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_42_vary: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44] ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_42_vary: @@ -496,13 +496,13 @@ define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_42_vary: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_42_vary: @@ -533,13 +533,13 @@ define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask: @@ -571,13 +571,13 @@ define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask: Index: test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll +++ test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll @@ -16,11 +16,12 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-LABEL: out_v1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: notb %dl -; CHECK-NEXT: andb %sil, %dl -; CHECK-NEXT: orb %dil, %dl ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: andl %edx, %edi +; CHECK-NEXT: notb %al +; CHECK-NEXT: andb %sil, %al +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %mx = and <1 x i8> %x, %mask %notmask = xor <1 x i8> %mask, @@ -36,29 +37,31 @@ define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i8: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r8d, %edi ; CHECK-BASELINE-NEXT: andl %r9d, %esi -; CHECK-BASELINE-NEXT: notb %r8b +; CHECK-BASELINE-NEXT: notb %al ; CHECK-BASELINE-NEXT: notb %r9b ; CHECK-BASELINE-NEXT: andb %cl, %r9b -; CHECK-BASELINE-NEXT: andb %dl, %r8b -; CHECK-BASELINE-NEXT: orb %dil, %r8b +; CHECK-BASELINE-NEXT: andb %dl, %al +; CHECK-BASELINE-NEXT: orb %dil, %al ; CHECK-BASELINE-NEXT: orb %sil, %r9b -; CHECK-BASELINE-NEXT: movl %r8d, %eax +; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BASELINE-NEXT: movl %r9d, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i8: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r8d, %edi ; CHECK-SSE1-NEXT: andl %r9d, %esi -; CHECK-SSE1-NEXT: notb %r8b +; CHECK-SSE1-NEXT: notb %al ; CHECK-SSE1-NEXT: notb %r9b ; CHECK-SSE1-NEXT: andb %cl, %r9b -; CHECK-SSE1-NEXT: andb %dl, %r8b -; CHECK-SSE1-NEXT: orb %dil, %r8b +; CHECK-SSE1-NEXT: andb %dl, %al +; CHECK-SSE1-NEXT: orb %dil, %al ; CHECK-SSE1-NEXT: orb %sil, %r9b -; CHECK-SSE1-NEXT: movl %r8d, %eax +; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax ; CHECK-SSE1-NEXT: movl %r9d, %edx ; CHECK-SSE1-NEXT: retq ; @@ -87,11 +90,12 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { ; CHECK-LABEL: out_v1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: notl %edx -; CHECK-NEXT: andl %esi, %edx -; CHECK-NEXT: orl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: andl %edx, %edi +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %mx = and <1 x i16> %x, %mask %notmask = xor <1 x i16> %mask, @@ -108,62 +112,62 @@ ; CHECK-BASELINE-LABEL: out_v4i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: andb %bl, %r8b -; CHECK-BASELINE-NEXT: andb %al, %cl -; CHECK-BASELINE-NEXT: andb %r11b, %dl -; CHECK-BASELINE-NEXT: andb %r10b, %sil +; CHECK-BASELINE-NEXT: andb %r11b, %cl +; CHECK-BASELINE-NEXT: andb %r10b, %dl +; CHECK-BASELINE-NEXT: andb %dil, %sil +; CHECK-BASELINE-NEXT: notb %r10b ; CHECK-BASELINE-NEXT: notb %r11b -; CHECK-BASELINE-NEXT: notb %al ; CHECK-BASELINE-NEXT: notb %bl -; CHECK-BASELINE-NEXT: notb %r10b -; CHECK-BASELINE-NEXT: andb %r9b, %r10b -; CHECK-BASELINE-NEXT: orb %sil, %r10b +; CHECK-BASELINE-NEXT: notb %dil +; CHECK-BASELINE-NEXT: andb %r9b, %dil +; CHECK-BASELINE-NEXT: orb %sil, %dil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: orb %r8b, %bl -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %cl, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b -; CHECK-BASELINE-NEXT: orb %dl, %r11b -; CHECK-BASELINE-NEXT: movb %bl, 3(%rdi) -; CHECK-BASELINE-NEXT: movb %al, 2(%rdi) -; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) -; CHECK-BASELINE-NEXT: movb %r10b, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: orb %cl, %r11b +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: orb %dl, %r10b +; CHECK-BASELINE-NEXT: movb %bl, 3(%rax) +; CHECK-BASELINE-NEXT: movb %r11b, 2(%rax) +; CHECK-BASELINE-NEXT: movb %r10b, 1(%rax) +; CHECK-BASELINE-NEXT: movb %dil, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: andb %bl, %r8b -; CHECK-SSE1-NEXT: andb %al, %cl -; CHECK-SSE1-NEXT: andb %r11b, %dl -; CHECK-SSE1-NEXT: andb %r10b, %sil +; CHECK-SSE1-NEXT: andb %r11b, %cl +; CHECK-SSE1-NEXT: andb %r10b, %dl +; CHECK-SSE1-NEXT: andb %dil, %sil +; CHECK-SSE1-NEXT: notb %r10b ; CHECK-SSE1-NEXT: notb %r11b -; CHECK-SSE1-NEXT: notb %al ; CHECK-SSE1-NEXT: notb %bl -; CHECK-SSE1-NEXT: notb %r10b -; CHECK-SSE1-NEXT: andb %r9b, %r10b -; CHECK-SSE1-NEXT: orb %sil, %r10b +; CHECK-SSE1-NEXT: notb %dil +; CHECK-SSE1-NEXT: andb %r9b, %dil +; CHECK-SSE1-NEXT: orb %sil, %dil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: orb %r8b, %bl -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %cl, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b -; CHECK-SSE1-NEXT: orb %dl, %r11b -; CHECK-SSE1-NEXT: movb %bl, 3(%rdi) -; CHECK-SSE1-NEXT: movb %al, 2(%rdi) -; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) -; CHECK-SSE1-NEXT: movb %r10b, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: orb %cl, %r11b +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: orb %dl, %r10b +; CHECK-SSE1-NEXT: movb %bl, 3(%rax) +; CHECK-SSE1-NEXT: movb %r11b, 2(%rax) +; CHECK-SSE1-NEXT: movb %r10b, 1(%rax) +; CHECK-SSE1-NEXT: movb %dil, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: retq ; @@ -192,52 +196,52 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i8_undef: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: andb %al, %r8b -; CHECK-BASELINE-NEXT: andb %r11b, %dl -; CHECK-BASELINE-NEXT: andb %r10b, %sil +; CHECK-BASELINE-NEXT: andb %r11b, %r8b +; CHECK-BASELINE-NEXT: andb %r10b, %dl +; CHECK-BASELINE-NEXT: andb %dil, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: notb %r11b -; CHECK-BASELINE-NEXT: notb %al ; CHECK-BASELINE-NEXT: notb %r10b -; CHECK-BASELINE-NEXT: andb %r9b, %r10b -; CHECK-BASELINE-NEXT: orb %sil, %r10b -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %r8b, %al +; CHECK-BASELINE-NEXT: notb %r11b +; CHECK-BASELINE-NEXT: notb %dil +; CHECK-BASELINE-NEXT: andb %r9b, %dil +; CHECK-BASELINE-NEXT: orb %sil, %dil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b -; CHECK-BASELINE-NEXT: orb %dl, %r11b -; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) -; CHECK-BASELINE-NEXT: movb %al, 3(%rdi) -; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) -; CHECK-BASELINE-NEXT: movb %r10b, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: orb %r8b, %r11b +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: orb %dl, %r10b +; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) +; CHECK-BASELINE-NEXT: movb %r11b, 3(%rax) +; CHECK-BASELINE-NEXT: movb %r10b, 1(%rax) +; CHECK-BASELINE-NEXT: movb %dil, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i8_undef: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: andb %al, %r8b -; CHECK-SSE1-NEXT: andb %r11b, %dl -; CHECK-SSE1-NEXT: andb %r10b, %sil +; CHECK-SSE1-NEXT: andb %r11b, %r8b +; CHECK-SSE1-NEXT: andb %r10b, %dl +; CHECK-SSE1-NEXT: andb %dil, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: notb %r11b -; CHECK-SSE1-NEXT: notb %al ; CHECK-SSE1-NEXT: notb %r10b -; CHECK-SSE1-NEXT: andb %r9b, %r10b -; CHECK-SSE1-NEXT: orb %sil, %r10b -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %r8b, %al +; CHECK-SSE1-NEXT: notb %r11b +; CHECK-SSE1-NEXT: notb %dil +; CHECK-SSE1-NEXT: andb %r9b, %dil +; CHECK-SSE1-NEXT: orb %sil, %dil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b -; CHECK-SSE1-NEXT: orb %dl, %r11b -; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) -; CHECK-SSE1-NEXT: movb %al, 3(%rdi) -; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) -; CHECK-SSE1-NEXT: movb %r10b, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: orb %r8b, %r11b +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: orb %dl, %r10b +; CHECK-SSE1-NEXT: movb %cl, 2(%rax) +; CHECK-SSE1-NEXT: movb %r11b, 3(%rax) +; CHECK-SSE1-NEXT: movb %r10b, 1(%rax) +; CHECK-SSE1-NEXT: movb %dil, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i8_undef: @@ -265,29 +269,31 @@ define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i16: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %edi -; CHECK-BASELINE-NEXT: notl %r8d +; CHECK-BASELINE-NEXT: notl %eax ; CHECK-BASELINE-NEXT: notl %r9d ; CHECK-BASELINE-NEXT: andl %ecx, %r9d ; CHECK-BASELINE-NEXT: orl %esi, %r9d -; CHECK-BASELINE-NEXT: andl %edx, %r8d -; CHECK-BASELINE-NEXT: orl %edi, %r8d -; CHECK-BASELINE-NEXT: movl %r8d, %eax +; CHECK-BASELINE-NEXT: andl %edx, %eax +; CHECK-BASELINE-NEXT: orl %edi, %eax +; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BASELINE-NEXT: movl %r9d, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i16: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %edi -; CHECK-SSE1-NEXT: notl %r8d +; CHECK-SSE1-NEXT: notl %eax ; CHECK-SSE1-NEXT: notl %r9d ; CHECK-SSE1-NEXT: andl %ecx, %r9d ; CHECK-SSE1-NEXT: orl %esi, %r9d -; CHECK-SSE1-NEXT: andl %edx, %r8d -; CHECK-SSE1-NEXT: orl %edi, %r8d -; CHECK-SSE1-NEXT: movl %r8d, %eax +; CHECK-SSE1-NEXT: andl %edx, %eax +; CHECK-SSE1-NEXT: orl %edi, %eax +; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-SSE1-NEXT: movl %r9d, %edx ; CHECK-SSE1-NEXT: retq ; @@ -316,11 +322,11 @@ define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { ; CHECK-LABEL: out_v1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: notl %edx -; CHECK-NEXT: andl %esi, %edx -; CHECK-NEXT: orl %edi, %edx ; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: andl %edx, %edi +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %mx = and <1 x i32> %x, %mask %notmask = xor <1 x i32> %mask, @@ -342,60 +348,60 @@ ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: andb %al, %r9b -; CHECK-BASELINE-NEXT: andb %bl, %r8b -; CHECK-BASELINE-NEXT: andb %r14b, %cl +; CHECK-BASELINE-NEXT: andb %bl, %r9b +; CHECK-BASELINE-NEXT: andb %r15b, %r8b +; CHECK-BASELINE-NEXT: andb %bpl, %cl ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: andb %r11b, %dl ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: andb %r10b, %sil +; CHECK-BASELINE-NEXT: andb %dil, %sil ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: andb %r12b, %r13b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: andb %r15b, %cl +; CHECK-BASELINE-NEXT: andb %r14b, %cl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl -; CHECK-BASELINE-NEXT: andb %bpl, %dl -; CHECK-BASELINE-NEXT: notb %r10b +; CHECK-BASELINE-NEXT: andb %r10b, %dl +; CHECK-BASELINE-NEXT: notb %dil ; CHECK-BASELINE-NEXT: notb %r11b -; CHECK-BASELINE-NEXT: notb %r14b -; CHECK-BASELINE-NEXT: notb %bl -; CHECK-BASELINE-NEXT: notb %al ; CHECK-BASELINE-NEXT: notb %bpl ; CHECK-BASELINE-NEXT: notb %r15b +; CHECK-BASELINE-NEXT: notb %bl +; CHECK-BASELINE-NEXT: notb %r10b +; CHECK-BASELINE-NEXT: notb %r14b ; CHECK-BASELINE-NEXT: notb %r12b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: orb %r13b, %r12b +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: orb %cl, %r14b +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: orb %dl, %r10b +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl +; CHECK-BASELINE-NEXT: orb %r9b, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b -; CHECK-BASELINE-NEXT: orb %cl, %r15b +; CHECK-BASELINE-NEXT: orb %r8b, %r15b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl -; CHECK-BASELINE-NEXT: orb %dl, %bpl -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %r9b, %al -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl -; CHECK-BASELINE-NEXT: orb %r8b, %bl -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b -; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %bpl # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b -; CHECK-BASELINE-NEXT: orb %sil, %r10b -; CHECK-BASELINE-NEXT: movb %r12b, 7(%rdi) -; CHECK-BASELINE-NEXT: movb %r15b, 6(%rdi) -; CHECK-BASELINE-NEXT: movb %bpl, 5(%rdi) -; CHECK-BASELINE-NEXT: movb %al, 4(%rdi) -; CHECK-BASELINE-NEXT: movb %bl, 3(%rdi) -; CHECK-BASELINE-NEXT: movb %r14b, 2(%rdi) -; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) -; CHECK-BASELINE-NEXT: movb %r10b, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: orb %sil, %dil +; CHECK-BASELINE-NEXT: movb %r12b, 7(%rax) +; CHECK-BASELINE-NEXT: movb %r14b, 6(%rax) +; CHECK-BASELINE-NEXT: movb %r10b, 5(%rax) +; CHECK-BASELINE-NEXT: movb %bl, 4(%rax) +; CHECK-BASELINE-NEXT: movb %r15b, 3(%rax) +; CHECK-BASELINE-NEXT: movb %bpl, 2(%rax) +; CHECK-BASELINE-NEXT: movb %r11b, 1(%rax) +; CHECK-BASELINE-NEXT: movb %dil, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 @@ -412,60 +418,60 @@ ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: andb %al, %r9b -; CHECK-SSE1-NEXT: andb %bl, %r8b -; CHECK-SSE1-NEXT: andb %r14b, %cl +; CHECK-SSE1-NEXT: andb %bl, %r9b +; CHECK-SSE1-NEXT: andb %r15b, %r8b +; CHECK-SSE1-NEXT: andb %bpl, %cl ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: andb %r11b, %dl ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: andb %r10b, %sil +; CHECK-SSE1-NEXT: andb %dil, %sil ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: andb %r12b, %r13b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: andb %r15b, %cl +; CHECK-SSE1-NEXT: andb %r14b, %cl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl -; CHECK-SSE1-NEXT: andb %bpl, %dl -; CHECK-SSE1-NEXT: notb %r10b +; CHECK-SSE1-NEXT: andb %r10b, %dl +; CHECK-SSE1-NEXT: notb %dil ; CHECK-SSE1-NEXT: notb %r11b -; CHECK-SSE1-NEXT: notb %r14b -; CHECK-SSE1-NEXT: notb %bl -; CHECK-SSE1-NEXT: notb %al ; CHECK-SSE1-NEXT: notb %bpl ; CHECK-SSE1-NEXT: notb %r15b +; CHECK-SSE1-NEXT: notb %bl +; CHECK-SSE1-NEXT: notb %r10b +; CHECK-SSE1-NEXT: notb %r14b ; CHECK-SSE1-NEXT: notb %r12b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: orb %r13b, %r12b +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: orb %cl, %r14b +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: orb %dl, %r10b +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl +; CHECK-SSE1-NEXT: orb %r9b, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b -; CHECK-SSE1-NEXT: orb %cl, %r15b +; CHECK-SSE1-NEXT: orb %r8b, %r15b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl -; CHECK-SSE1-NEXT: orb %dl, %bpl -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %r9b, %al -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl -; CHECK-SSE1-NEXT: orb %r8b, %bl -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b -; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload +; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %bpl # 1-byte Folded Reload ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b -; CHECK-SSE1-NEXT: orb %sil, %r10b -; CHECK-SSE1-NEXT: movb %r12b, 7(%rdi) -; CHECK-SSE1-NEXT: movb %r15b, 6(%rdi) -; CHECK-SSE1-NEXT: movb %bpl, 5(%rdi) -; CHECK-SSE1-NEXT: movb %al, 4(%rdi) -; CHECK-SSE1-NEXT: movb %bl, 3(%rdi) -; CHECK-SSE1-NEXT: movb %r14b, 2(%rdi) -; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) -; CHECK-SSE1-NEXT: movb %r10b, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: orb %sil, %dil +; CHECK-SSE1-NEXT: movb %r12b, 7(%rax) +; CHECK-SSE1-NEXT: movb %r14b, 6(%rax) +; CHECK-SSE1-NEXT: movb %r10b, 5(%rax) +; CHECK-SSE1-NEXT: movb %bl, 4(%rax) +; CHECK-SSE1-NEXT: movb %r15b, 3(%rax) +; CHECK-SSE1-NEXT: movb %bpl, 2(%rax) +; CHECK-SSE1-NEXT: movb %r11b, 1(%rax) +; CHECK-SSE1-NEXT: movb %dil, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 @@ -500,62 +506,62 @@ ; CHECK-BASELINE-LABEL: out_v4i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d -; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: andl %ebx, %esi -; CHECK-BASELINE-NEXT: andl %eax, %r8d +; CHECK-BASELINE-NEXT: andl %edi, %r8d ; CHECK-BASELINE-NEXT: andl %r11d, %ecx ; CHECK-BASELINE-NEXT: andl %r10d, %edx ; CHECK-BASELINE-NEXT: notl %r10d ; CHECK-BASELINE-NEXT: notl %r11d -; CHECK-BASELINE-NEXT: notl %eax +; CHECK-BASELINE-NEXT: notl %edi ; CHECK-BASELINE-NEXT: notl %ebx ; CHECK-BASELINE-NEXT: andl %r9d, %ebx ; CHECK-BASELINE-NEXT: orl %esi, %ebx -; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %ax -; CHECK-BASELINE-NEXT: orl %r8d, %eax +; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di +; CHECK-BASELINE-NEXT: orl %r8d, %edi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w ; CHECK-BASELINE-NEXT: orl %ecx, %r11d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w ; CHECK-BASELINE-NEXT: orl %edx, %r10d -; CHECK-BASELINE-NEXT: movw %bx, (%rdi) -; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi) -; CHECK-BASELINE-NEXT: movw %r11w, 4(%rdi) -; CHECK-BASELINE-NEXT: movw %r10w, 2(%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movw %bx, (%rax) +; CHECK-BASELINE-NEXT: movw %di, 6(%rax) +; CHECK-BASELINE-NEXT: movw %r11w, 4(%rax) +; CHECK-BASELINE-NEXT: movw %r10w, 2(%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d -; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: andl %ebx, %esi -; CHECK-SSE1-NEXT: andl %eax, %r8d +; CHECK-SSE1-NEXT: andl %edi, %r8d ; CHECK-SSE1-NEXT: andl %r11d, %ecx ; CHECK-SSE1-NEXT: andl %r10d, %edx ; CHECK-SSE1-NEXT: notl %r10d ; CHECK-SSE1-NEXT: notl %r11d -; CHECK-SSE1-NEXT: notl %eax +; CHECK-SSE1-NEXT: notl %edi ; CHECK-SSE1-NEXT: notl %ebx ; CHECK-SSE1-NEXT: andl %r9d, %ebx ; CHECK-SSE1-NEXT: orl %esi, %ebx -; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %ax -; CHECK-SSE1-NEXT: orl %r8d, %eax +; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di +; CHECK-SSE1-NEXT: orl %r8d, %edi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w ; CHECK-SSE1-NEXT: orl %ecx, %r11d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w ; CHECK-SSE1-NEXT: orl %edx, %r10d -; CHECK-SSE1-NEXT: movw %bx, (%rdi) -; CHECK-SSE1-NEXT: movw %ax, 6(%rdi) -; CHECK-SSE1-NEXT: movw %r11w, 4(%rdi) -; CHECK-SSE1-NEXT: movw %r10w, 2(%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movw %bx, (%rax) +; CHECK-SSE1-NEXT: movw %di, 6(%rax) +; CHECK-SSE1-NEXT: movw %r11w, 4(%rax) +; CHECK-SSE1-NEXT: movw %r10w, 2(%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: retq ; @@ -584,52 +590,52 @@ define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i16_undef: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d -; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: andl %eax, %esi +; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: andl %edi, %esi ; CHECK-BASELINE-NEXT: andl %r11d, %r8d ; CHECK-BASELINE-NEXT: andl %r10d, %edx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: notl %r10d ; CHECK-BASELINE-NEXT: notl %r11d -; CHECK-BASELINE-NEXT: notl %eax -; CHECK-BASELINE-NEXT: andl %r9d, %eax -; CHECK-BASELINE-NEXT: orl %esi, %eax +; CHECK-BASELINE-NEXT: notl %edi +; CHECK-BASELINE-NEXT: andl %r9d, %edi +; CHECK-BASELINE-NEXT: orl %esi, %edi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w ; CHECK-BASELINE-NEXT: orl %r8d, %r11d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w ; CHECK-BASELINE-NEXT: orl %edx, %r10d -; CHECK-BASELINE-NEXT: movw %cx, 4(%rdi) -; CHECK-BASELINE-NEXT: movw %ax, (%rdi) -; CHECK-BASELINE-NEXT: movw %r11w, 6(%rdi) -; CHECK-BASELINE-NEXT: movw %r10w, 2(%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) +; CHECK-BASELINE-NEXT: movw %di, (%rax) +; CHECK-BASELINE-NEXT: movw %r11w, 6(%rax) +; CHECK-BASELINE-NEXT: movw %r10w, 2(%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i16_undef: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d -; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: andl %eax, %esi +; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: andl %edi, %esi ; CHECK-SSE1-NEXT: andl %r11d, %r8d ; CHECK-SSE1-NEXT: andl %r10d, %edx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: notl %r10d ; CHECK-SSE1-NEXT: notl %r11d -; CHECK-SSE1-NEXT: notl %eax -; CHECK-SSE1-NEXT: andl %r9d, %eax -; CHECK-SSE1-NEXT: orl %esi, %eax +; CHECK-SSE1-NEXT: notl %edi +; CHECK-SSE1-NEXT: andl %r9d, %edi +; CHECK-SSE1-NEXT: orl %esi, %edi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w ; CHECK-SSE1-NEXT: orl %r8d, %r11d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w ; CHECK-SSE1-NEXT: orl %edx, %r10d -; CHECK-SSE1-NEXT: movw %cx, 4(%rdi) -; CHECK-SSE1-NEXT: movw %ax, (%rdi) -; CHECK-SSE1-NEXT: movw %r11w, 6(%rdi) -; CHECK-SSE1-NEXT: movw %r10w, 2(%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movw %cx, 4(%rax) +; CHECK-SSE1-NEXT: movw %di, (%rax) +; CHECK-SSE1-NEXT: movw %r11w, 6(%rax) +; CHECK-SSE1-NEXT: movw %r10w, 2(%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i16_undef: @@ -657,29 +663,29 @@ define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i32: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %edi -; CHECK-BASELINE-NEXT: notl %r8d +; CHECK-BASELINE-NEXT: notl %eax ; CHECK-BASELINE-NEXT: notl %r9d ; CHECK-BASELINE-NEXT: andl %ecx, %r9d ; CHECK-BASELINE-NEXT: orl %esi, %r9d -; CHECK-BASELINE-NEXT: andl %edx, %r8d -; CHECK-BASELINE-NEXT: orl %edi, %r8d -; CHECK-BASELINE-NEXT: movl %r8d, %eax +; CHECK-BASELINE-NEXT: andl %edx, %eax +; CHECK-BASELINE-NEXT: orl %edi, %eax ; CHECK-BASELINE-NEXT: movl %r9d, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i32: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %edi -; CHECK-SSE1-NEXT: notl %r8d +; CHECK-SSE1-NEXT: notl %eax ; CHECK-SSE1-NEXT: notl %r9d ; CHECK-SSE1-NEXT: andl %ecx, %r9d ; CHECK-SSE1-NEXT: orl %esi, %r9d -; CHECK-SSE1-NEXT: andl %edx, %r8d -; CHECK-SSE1-NEXT: orl %edi, %r8d -; CHECK-SSE1-NEXT: movl %r8d, %eax +; CHECK-SSE1-NEXT: andl %edx, %eax +; CHECK-SSE1-NEXT: orl %edi, %eax ; CHECK-SSE1-NEXT: movl %r9d, %edx ; CHECK-SSE1-NEXT: retq ; @@ -708,11 +714,11 @@ define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { ; CHECK-LABEL: out_v1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: andq %rdx, %rdi -; CHECK-NEXT: notq %rdx -; CHECK-NEXT: andq %rsi, %rdx -; CHECK-NEXT: orq %rdi, %rdx ; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: andq %rdx, %rdi +; CHECK-NEXT: notq %rax +; CHECK-NEXT: andq %rsi, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %mx = and <1 x i64> %x, %mask %notmask = xor <1 x i64> %mask, @@ -737,6 +743,8 @@ ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl @@ -747,12 +755,6 @@ ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil -; CHECK-BASELINE-NEXT: andb %al, %sil -; CHECK-BASELINE-NEXT: notb %al -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %sil, %al ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: andb %cl, %sil ; CHECK-BASELINE-NEXT: notb %cl @@ -803,51 +805,55 @@ ; CHECK-BASELINE-NEXT: notb %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: orb %sil, %r10b -; CHECK-BASELINE-NEXT: movb %al, 15(%rdi) -; CHECK-BASELINE-NEXT: movb %cl, 14(%rdi) -; CHECK-BASELINE-NEXT: movb %dl, 13(%rdi) -; CHECK-BASELINE-NEXT: movb %bl, 12(%rdi) -; CHECK-BASELINE-NEXT: movb %r13b, 11(%rdi) -; CHECK-BASELINE-NEXT: movb %r12b, 10(%rdi) -; CHECK-BASELINE-NEXT: movb %r15b, 9(%rdi) -; CHECK-BASELINE-NEXT: movb %r14b, 8(%rdi) -; CHECK-BASELINE-NEXT: movb %bpl, 7(%rdi) -; CHECK-BASELINE-NEXT: movb %r11b, 6(%rdi) -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: andb %al, %r9b -; CHECK-BASELINE-NEXT: notb %al -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %r9b, %al -; CHECK-BASELINE-NEXT: movb %r10b, 5(%rdi) +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil +; CHECK-BASELINE-NEXT: andb %dil, %sil +; CHECK-BASELINE-NEXT: notb %dil +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: orb %sil, %dil +; CHECK-BASELINE-NEXT: movb %cl, 15(%rax) +; CHECK-BASELINE-NEXT: movb %dl, 14(%rax) +; CHECK-BASELINE-NEXT: movb %bl, 13(%rax) +; CHECK-BASELINE-NEXT: movb %r13b, 12(%rax) +; CHECK-BASELINE-NEXT: movb %r12b, 11(%rax) +; CHECK-BASELINE-NEXT: movb %r15b, 10(%rax) +; CHECK-BASELINE-NEXT: movb %r14b, 9(%rax) +; CHECK-BASELINE-NEXT: movb %bpl, 8(%rax) +; CHECK-BASELINE-NEXT: movb %r11b, 7(%rax) +; CHECK-BASELINE-NEXT: movb %r10b, 6(%rax) ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: andb %cl, %r8b +; CHECK-BASELINE-NEXT: andb %cl, %r9b ; CHECK-BASELINE-NEXT: notb %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: orb %r8b, %cl -; CHECK-BASELINE-NEXT: movb %al, 4(%rdi) -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-BASELINE-NEXT: andb %al, %dl -; CHECK-BASELINE-NEXT: notb %al -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %dl, %al -; CHECK-BASELINE-NEXT: movb %cl, 3(%rdi) +; CHECK-BASELINE-NEXT: orb %r9b, %cl +; CHECK-BASELINE-NEXT: movb %dil, 5(%rax) +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: andb %dl, %r8b +; CHECK-BASELINE-NEXT: notb %dl +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: orb %r8b, %dl +; CHECK-BASELINE-NEXT: movb %cl, 4(%rax) ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-BASELINE-NEXT: andb %cl, %dl +; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; CHECK-BASELINE-NEXT: andb %cl, %sil ; CHECK-BASELINE-NEXT: notb %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: orb %dl, %cl -; CHECK-BASELINE-NEXT: movb %al, 2(%rdi) -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-BASELINE-NEXT: andb %al, %dl -; CHECK-BASELINE-NEXT: notb %al -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: orb %dl, %al -; CHECK-BASELINE-NEXT: movb %cl, 1(%rdi) -; CHECK-BASELINE-NEXT: movb %al, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: orb %sil, %cl +; CHECK-BASELINE-NEXT: movb %dl, 3(%rax) +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; CHECK-BASELINE-NEXT: andb %dl, %sil +; CHECK-BASELINE-NEXT: notb %dl +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: orb %sil, %dl +; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; CHECK-BASELINE-NEXT: andb %cl, %sil +; CHECK-BASELINE-NEXT: notb %cl +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl +; CHECK-BASELINE-NEXT: orb %sil, %cl +; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) +; CHECK-BASELINE-NEXT: movb %cl, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 @@ -867,6 +873,8 @@ ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl @@ -877,12 +885,6 @@ ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil -; CHECK-SSE1-NEXT: andb %al, %sil -; CHECK-SSE1-NEXT: notb %al -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %sil, %al ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: andb %cl, %sil ; CHECK-SSE1-NEXT: notb %cl @@ -933,51 +935,55 @@ ; CHECK-SSE1-NEXT: notb %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: orb %sil, %r10b -; CHECK-SSE1-NEXT: movb %al, 15(%rdi) -; CHECK-SSE1-NEXT: movb %cl, 14(%rdi) -; CHECK-SSE1-NEXT: movb %dl, 13(%rdi) -; CHECK-SSE1-NEXT: movb %bl, 12(%rdi) -; CHECK-SSE1-NEXT: movb %r13b, 11(%rdi) -; CHECK-SSE1-NEXT: movb %r12b, 10(%rdi) -; CHECK-SSE1-NEXT: movb %r15b, 9(%rdi) -; CHECK-SSE1-NEXT: movb %r14b, 8(%rdi) -; CHECK-SSE1-NEXT: movb %bpl, 7(%rdi) -; CHECK-SSE1-NEXT: movb %r11b, 6(%rdi) -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: andb %al, %r9b -; CHECK-SSE1-NEXT: notb %al -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %r9b, %al -; CHECK-SSE1-NEXT: movb %r10b, 5(%rdi) +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil +; CHECK-SSE1-NEXT: andb %dil, %sil +; CHECK-SSE1-NEXT: notb %dil +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: orb %sil, %dil +; CHECK-SSE1-NEXT: movb %cl, 15(%rax) +; CHECK-SSE1-NEXT: movb %dl, 14(%rax) +; CHECK-SSE1-NEXT: movb %bl, 13(%rax) +; CHECK-SSE1-NEXT: movb %r13b, 12(%rax) +; CHECK-SSE1-NEXT: movb %r12b, 11(%rax) +; CHECK-SSE1-NEXT: movb %r15b, 10(%rax) +; CHECK-SSE1-NEXT: movb %r14b, 9(%rax) +; CHECK-SSE1-NEXT: movb %bpl, 8(%rax) +; CHECK-SSE1-NEXT: movb %r11b, 7(%rax) +; CHECK-SSE1-NEXT: movb %r10b, 6(%rax) ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: andb %cl, %r8b +; CHECK-SSE1-NEXT: andb %cl, %r9b ; CHECK-SSE1-NEXT: notb %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: orb %r8b, %cl -; CHECK-SSE1-NEXT: movb %al, 4(%rdi) -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-SSE1-NEXT: andb %al, %dl -; CHECK-SSE1-NEXT: notb %al -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %dl, %al -; CHECK-SSE1-NEXT: movb %cl, 3(%rdi) +; CHECK-SSE1-NEXT: orb %r9b, %cl +; CHECK-SSE1-NEXT: movb %dil, 5(%rax) +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: andb %dl, %r8b +; CHECK-SSE1-NEXT: notb %dl +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: orb %r8b, %dl +; CHECK-SSE1-NEXT: movb %cl, 4(%rax) ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-SSE1-NEXT: andb %cl, %dl +; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; CHECK-SSE1-NEXT: andb %cl, %sil ; CHECK-SSE1-NEXT: notb %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: orb %dl, %cl -; CHECK-SSE1-NEXT: movb %al, 2(%rdi) -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-SSE1-NEXT: andb %al, %dl -; CHECK-SSE1-NEXT: notb %al -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: orb %dl, %al -; CHECK-SSE1-NEXT: movb %cl, 1(%rdi) -; CHECK-SSE1-NEXT: movb %al, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: orb %sil, %cl +; CHECK-SSE1-NEXT: movb %dl, 3(%rax) +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; CHECK-SSE1-NEXT: andb %dl, %sil +; CHECK-SSE1-NEXT: notb %dl +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: orb %sil, %dl +; CHECK-SSE1-NEXT: movb %cl, 2(%rax) +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; CHECK-SSE1-NEXT: andb %cl, %sil +; CHECK-SSE1-NEXT: notb %cl +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl +; CHECK-SSE1-NEXT: orb %sil, %cl +; CHECK-SSE1-NEXT: movb %dl, 1(%rax) +; CHECK-SSE1-NEXT: movb %cl, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 @@ -1010,6 +1016,7 @@ ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r14d @@ -1033,11 +1040,11 @@ ; CHECK-BASELINE-NEXT: notl %ebx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx ; CHECK-BASELINE-NEXT: orl %r9d, %ebx -; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: andl %eax, %r8d -; CHECK-BASELINE-NEXT: notl %eax -; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %ax -; CHECK-BASELINE-NEXT: orl %r8d, %eax +; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: andl %edi, %r8d +; CHECK-BASELINE-NEXT: notl %edi +; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di +; CHECK-BASELINE-NEXT: orl %r8d, %edi ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: andl %ebp, %ecx ; CHECK-BASELINE-NEXT: notl %ebp @@ -1053,15 +1060,14 @@ ; CHECK-BASELINE-NEXT: notl %edx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: orl %esi, %edx -; CHECK-BASELINE-NEXT: movw %r14w, 14(%rdi) -; CHECK-BASELINE-NEXT: movw %r11w, 12(%rdi) -; CHECK-BASELINE-NEXT: movw %r10w, 10(%rdi) -; CHECK-BASELINE-NEXT: movw %bx, 8(%rdi) -; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi) -; CHECK-BASELINE-NEXT: movw %bp, 4(%rdi) -; CHECK-BASELINE-NEXT: movw %cx, 2(%rdi) -; CHECK-BASELINE-NEXT: movw %dx, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movw %r14w, 14(%rax) +; CHECK-BASELINE-NEXT: movw %r11w, 12(%rax) +; CHECK-BASELINE-NEXT: movw %r10w, 10(%rax) +; CHECK-BASELINE-NEXT: movw %bx, 8(%rax) +; CHECK-BASELINE-NEXT: movw %di, 6(%rax) +; CHECK-BASELINE-NEXT: movw %bp, 4(%rax) +; CHECK-BASELINE-NEXT: movw %cx, 2(%rax) +; CHECK-BASELINE-NEXT: movw %dx, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %rbp @@ -1072,6 +1078,7 @@ ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r14d @@ -1095,11 +1102,11 @@ ; CHECK-SSE1-NEXT: notl %ebx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx ; CHECK-SSE1-NEXT: orl %r9d, %ebx -; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: andl %eax, %r8d -; CHECK-SSE1-NEXT: notl %eax -; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %ax -; CHECK-SSE1-NEXT: orl %r8d, %eax +; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: andl %edi, %r8d +; CHECK-SSE1-NEXT: notl %edi +; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di +; CHECK-SSE1-NEXT: orl %r8d, %edi ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: andl %ebp, %ecx ; CHECK-SSE1-NEXT: notl %ebp @@ -1115,15 +1122,14 @@ ; CHECK-SSE1-NEXT: notl %edx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: orl %esi, %edx -; CHECK-SSE1-NEXT: movw %r14w, 14(%rdi) -; CHECK-SSE1-NEXT: movw %r11w, 12(%rdi) -; CHECK-SSE1-NEXT: movw %r10w, 10(%rdi) -; CHECK-SSE1-NEXT: movw %bx, 8(%rdi) -; CHECK-SSE1-NEXT: movw %ax, 6(%rdi) -; CHECK-SSE1-NEXT: movw %bp, 4(%rdi) -; CHECK-SSE1-NEXT: movw %cx, 2(%rdi) -; CHECK-SSE1-NEXT: movw %dx, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movw %r14w, 14(%rax) +; CHECK-SSE1-NEXT: movw %r11w, 12(%rax) +; CHECK-SSE1-NEXT: movw %r10w, 10(%rax) +; CHECK-SSE1-NEXT: movw %bx, 8(%rax) +; CHECK-SSE1-NEXT: movw %di, 6(%rax) +; CHECK-SSE1-NEXT: movw %bp, 4(%rax) +; CHECK-SSE1-NEXT: movw %cx, 2(%rax) +; CHECK-SSE1-NEXT: movw %dx, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %rbp @@ -1151,47 +1157,47 @@ ; CHECK-BASELINE-LABEL: out_v4i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl (%rcx), %r8d ; CHECK-BASELINE-NEXT: movl 4(%rcx), %r9d -; CHECK-BASELINE-NEXT: movl 8(%rcx), %eax +; CHECK-BASELINE-NEXT: movl 8(%rcx), %edi ; CHECK-BASELINE-NEXT: movl 12(%rcx), %ecx ; CHECK-BASELINE-NEXT: movl 12(%rsi), %r10d ; CHECK-BASELINE-NEXT: andl %ecx, %r10d ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r11d -; CHECK-BASELINE-NEXT: andl %eax, %r11d +; CHECK-BASELINE-NEXT: andl %edi, %r11d ; CHECK-BASELINE-NEXT: movl 4(%rsi), %ebx ; CHECK-BASELINE-NEXT: andl %r9d, %ebx ; CHECK-BASELINE-NEXT: movl (%rsi), %esi ; CHECK-BASELINE-NEXT: andl %r8d, %esi ; CHECK-BASELINE-NEXT: notl %r8d ; CHECK-BASELINE-NEXT: notl %r9d -; CHECK-BASELINE-NEXT: notl %eax +; CHECK-BASELINE-NEXT: notl %edi ; CHECK-BASELINE-NEXT: notl %ecx ; CHECK-BASELINE-NEXT: andl 12(%rdx), %ecx ; CHECK-BASELINE-NEXT: orl %r10d, %ecx -; CHECK-BASELINE-NEXT: andl 8(%rdx), %eax -; CHECK-BASELINE-NEXT: orl %r11d, %eax +; CHECK-BASELINE-NEXT: andl 8(%rdx), %edi +; CHECK-BASELINE-NEXT: orl %r11d, %edi ; CHECK-BASELINE-NEXT: andl 4(%rdx), %r9d ; CHECK-BASELINE-NEXT: orl %ebx, %r9d ; CHECK-BASELINE-NEXT: andl (%rdx), %r8d ; CHECK-BASELINE-NEXT: orl %esi, %r8d -; CHECK-BASELINE-NEXT: movl %ecx, 12(%rdi) -; CHECK-BASELINE-NEXT: movl %eax, 8(%rdi) -; CHECK-BASELINE-NEXT: movl %r9d, 4(%rdi) -; CHECK-BASELINE-NEXT: movl %r8d, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movl %ecx, 12(%rax) +; CHECK-BASELINE-NEXT: movl %edi, 8(%rax) +; CHECK-BASELINE-NEXT: movl %r9d, 4(%rax) +; CHECK-BASELINE-NEXT: movl %r8d, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i32: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i32: @@ -1222,42 +1228,42 @@ define <4 x i32> @out_v4i32_undef(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i32_undef: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r8d ; CHECK-BASELINE-NEXT: movl (%rcx), %r9d ; CHECK-BASELINE-NEXT: movl 4(%rcx), %r10d -; CHECK-BASELINE-NEXT: movl 12(%rcx), %eax +; CHECK-BASELINE-NEXT: movl 12(%rcx), %edi ; CHECK-BASELINE-NEXT: andl 8(%rcx), %r8d ; CHECK-BASELINE-NEXT: movl 12(%rsi), %ecx -; CHECK-BASELINE-NEXT: andl %eax, %ecx +; CHECK-BASELINE-NEXT: andl %edi, %ecx ; CHECK-BASELINE-NEXT: movl 4(%rsi), %r11d ; CHECK-BASELINE-NEXT: andl %r10d, %r11d ; CHECK-BASELINE-NEXT: movl (%rsi), %esi ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: notl %r9d ; CHECK-BASELINE-NEXT: notl %r10d -; CHECK-BASELINE-NEXT: notl %eax -; CHECK-BASELINE-NEXT: andl 12(%rdx), %eax -; CHECK-BASELINE-NEXT: orl %ecx, %eax +; CHECK-BASELINE-NEXT: notl %edi +; CHECK-BASELINE-NEXT: andl 12(%rdx), %edi +; CHECK-BASELINE-NEXT: orl %ecx, %edi ; CHECK-BASELINE-NEXT: andl 4(%rdx), %r10d ; CHECK-BASELINE-NEXT: orl %r11d, %r10d ; CHECK-BASELINE-NEXT: andl (%rdx), %r9d ; CHECK-BASELINE-NEXT: orl %esi, %r9d -; CHECK-BASELINE-NEXT: movl %r8d, 8(%rdi) -; CHECK-BASELINE-NEXT: movl %eax, 12(%rdi) -; CHECK-BASELINE-NEXT: movl %r10d, 4(%rdi) -; CHECK-BASELINE-NEXT: movl %r9d, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movl %r8d, 8(%rax) +; CHECK-BASELINE-NEXT: movl %edi, 12(%rax) +; CHECK-BASELINE-NEXT: movl %r10d, 4(%rax) +; CHECK-BASELINE-NEXT: movl %r9d, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i32_undef: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i32_undef: @@ -1288,29 +1294,29 @@ define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i64: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movq %r8, %rax ; CHECK-BASELINE-NEXT: andq %r9, %rsi ; CHECK-BASELINE-NEXT: andq %r8, %rdi -; CHECK-BASELINE-NEXT: notq %r8 +; CHECK-BASELINE-NEXT: notq %rax ; CHECK-BASELINE-NEXT: notq %r9 ; CHECK-BASELINE-NEXT: andq %rcx, %r9 ; CHECK-BASELINE-NEXT: orq %rsi, %r9 -; CHECK-BASELINE-NEXT: andq %rdx, %r8 -; CHECK-BASELINE-NEXT: orq %rdi, %r8 -; CHECK-BASELINE-NEXT: movq %r8, %rax +; CHECK-BASELINE-NEXT: andq %rdx, %rax +; CHECK-BASELINE-NEXT: orq %rdi, %rax ; CHECK-BASELINE-NEXT: movq %r9, %rdx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i64: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %r8, %rax ; CHECK-SSE1-NEXT: andq %r9, %rsi ; CHECK-SSE1-NEXT: andq %r8, %rdi -; CHECK-SSE1-NEXT: notq %r8 +; CHECK-SSE1-NEXT: notq %rax ; CHECK-SSE1-NEXT: notq %r9 ; CHECK-SSE1-NEXT: andq %rcx, %r9 ; CHECK-SSE1-NEXT: orq %rsi, %r9 -; CHECK-SSE1-NEXT: andq %rdx, %r8 -; CHECK-SSE1-NEXT: orq %rdi, %r8 -; CHECK-SSE1-NEXT: movq %r8, %rax +; CHECK-SSE1-NEXT: andq %rdx, %rax +; CHECK-SSE1-NEXT: orq %rdi, %rax ; CHECK-SSE1-NEXT: movq %r9, %rdx ; CHECK-SSE1-NEXT: retq ; @@ -2291,6 +2297,7 @@ ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 4(%rcx), %r8d ; CHECK-BASELINE-NEXT: movl 8(%rcx), %r9d ; CHECK-BASELINE-NEXT: movl 12(%rcx), %r10d @@ -2303,51 +2310,50 @@ ; CHECK-BASELINE-NEXT: notl %ebp ; CHECK-BASELINE-NEXT: andl 28(%rdx), %ebp ; CHECK-BASELINE-NEXT: orl %r14d, %ebp -; CHECK-BASELINE-NEXT: movl 24(%rsi), %eax -; CHECK-BASELINE-NEXT: andl %ebx, %eax +; CHECK-BASELINE-NEXT: movl 24(%rsi), %edi +; CHECK-BASELINE-NEXT: andl %ebx, %edi ; CHECK-BASELINE-NEXT: notl %ebx ; CHECK-BASELINE-NEXT: andl 24(%rdx), %ebx -; CHECK-BASELINE-NEXT: orl %eax, %ebx -; CHECK-BASELINE-NEXT: movl 20(%rsi), %eax -; CHECK-BASELINE-NEXT: andl %r15d, %eax +; CHECK-BASELINE-NEXT: orl %edi, %ebx +; CHECK-BASELINE-NEXT: movl 20(%rsi), %edi +; CHECK-BASELINE-NEXT: andl %r15d, %edi ; CHECK-BASELINE-NEXT: notl %r15d ; CHECK-BASELINE-NEXT: andl 20(%rdx), %r15d -; CHECK-BASELINE-NEXT: orl %eax, %r15d -; CHECK-BASELINE-NEXT: movl 16(%rsi), %eax -; CHECK-BASELINE-NEXT: andl %r11d, %eax +; CHECK-BASELINE-NEXT: orl %edi, %r15d +; CHECK-BASELINE-NEXT: movl 16(%rsi), %edi +; CHECK-BASELINE-NEXT: andl %r11d, %edi ; CHECK-BASELINE-NEXT: notl %r11d ; CHECK-BASELINE-NEXT: andl 16(%rdx), %r11d -; CHECK-BASELINE-NEXT: orl %eax, %r11d -; CHECK-BASELINE-NEXT: movl 12(%rsi), %eax -; CHECK-BASELINE-NEXT: andl %r10d, %eax +; CHECK-BASELINE-NEXT: orl %edi, %r11d +; CHECK-BASELINE-NEXT: movl 12(%rsi), %edi +; CHECK-BASELINE-NEXT: andl %r10d, %edi ; CHECK-BASELINE-NEXT: notl %r10d ; CHECK-BASELINE-NEXT: andl 12(%rdx), %r10d -; CHECK-BASELINE-NEXT: orl %eax, %r10d -; CHECK-BASELINE-NEXT: movl 8(%rsi), %eax -; CHECK-BASELINE-NEXT: andl %r9d, %eax +; CHECK-BASELINE-NEXT: orl %edi, %r10d +; CHECK-BASELINE-NEXT: movl 8(%rsi), %edi +; CHECK-BASELINE-NEXT: andl %r9d, %edi ; CHECK-BASELINE-NEXT: notl %r9d ; CHECK-BASELINE-NEXT: andl 8(%rdx), %r9d -; CHECK-BASELINE-NEXT: orl %eax, %r9d -; CHECK-BASELINE-NEXT: movl 4(%rsi), %eax -; CHECK-BASELINE-NEXT: andl %r8d, %eax +; CHECK-BASELINE-NEXT: orl %edi, %r9d +; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi +; CHECK-BASELINE-NEXT: andl %r8d, %edi ; CHECK-BASELINE-NEXT: notl %r8d ; CHECK-BASELINE-NEXT: andl 4(%rdx), %r8d -; CHECK-BASELINE-NEXT: orl %eax, %r8d -; CHECK-BASELINE-NEXT: movl (%rcx), %eax -; CHECK-BASELINE-NEXT: movl (%rsi), %ecx -; CHECK-BASELINE-NEXT: andl %eax, %ecx -; CHECK-BASELINE-NEXT: notl %eax -; CHECK-BASELINE-NEXT: andl (%rdx), %eax -; CHECK-BASELINE-NEXT: orl %ecx, %eax -; CHECK-BASELINE-NEXT: movl %ebp, 28(%rdi) -; CHECK-BASELINE-NEXT: movl %ebx, 24(%rdi) -; CHECK-BASELINE-NEXT: movl %r15d, 20(%rdi) -; CHECK-BASELINE-NEXT: movl %r11d, 16(%rdi) -; CHECK-BASELINE-NEXT: movl %r10d, 12(%rdi) -; CHECK-BASELINE-NEXT: movl %r9d, 8(%rdi) -; CHECK-BASELINE-NEXT: movl %r8d, 4(%rdi) -; CHECK-BASELINE-NEXT: movl %eax, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: orl %edi, %r8d +; CHECK-BASELINE-NEXT: movl (%rcx), %ecx +; CHECK-BASELINE-NEXT: movl (%rsi), %esi +; CHECK-BASELINE-NEXT: andl %ecx, %esi +; CHECK-BASELINE-NEXT: notl %ecx +; CHECK-BASELINE-NEXT: andl (%rdx), %ecx +; CHECK-BASELINE-NEXT: orl %esi, %ecx +; CHECK-BASELINE-NEXT: movl %ebp, 28(%rax) +; CHECK-BASELINE-NEXT: movl %ebx, 24(%rax) +; CHECK-BASELINE-NEXT: movl %r15d, 20(%rax) +; CHECK-BASELINE-NEXT: movl %r11d, 16(%rax) +; CHECK-BASELINE-NEXT: movl %r10d, 12(%rax) +; CHECK-BASELINE-NEXT: movl %r9d, 8(%rax) +; CHECK-BASELINE-NEXT: movl %r8d, 4(%rax) +; CHECK-BASELINE-NEXT: movl %ecx, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 @@ -2360,6 +2366,7 @@ ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl 4(%rcx), %r8d ; CHECK-SSE1-NEXT: movl 8(%rcx), %r9d ; CHECK-SSE1-NEXT: movl 12(%rcx), %r10d @@ -2372,51 +2379,50 @@ ; CHECK-SSE1-NEXT: notl %ebp ; CHECK-SSE1-NEXT: andl 28(%rdx), %ebp ; CHECK-SSE1-NEXT: orl %r14d, %ebp -; CHECK-SSE1-NEXT: movl 24(%rsi), %eax -; CHECK-SSE1-NEXT: andl %ebx, %eax +; CHECK-SSE1-NEXT: movl 24(%rsi), %edi +; CHECK-SSE1-NEXT: andl %ebx, %edi ; CHECK-SSE1-NEXT: notl %ebx ; CHECK-SSE1-NEXT: andl 24(%rdx), %ebx -; CHECK-SSE1-NEXT: orl %eax, %ebx -; CHECK-SSE1-NEXT: movl 20(%rsi), %eax -; CHECK-SSE1-NEXT: andl %r15d, %eax +; CHECK-SSE1-NEXT: orl %edi, %ebx +; CHECK-SSE1-NEXT: movl 20(%rsi), %edi +; CHECK-SSE1-NEXT: andl %r15d, %edi ; CHECK-SSE1-NEXT: notl %r15d ; CHECK-SSE1-NEXT: andl 20(%rdx), %r15d -; CHECK-SSE1-NEXT: orl %eax, %r15d -; CHECK-SSE1-NEXT: movl 16(%rsi), %eax -; CHECK-SSE1-NEXT: andl %r11d, %eax +; CHECK-SSE1-NEXT: orl %edi, %r15d +; CHECK-SSE1-NEXT: movl 16(%rsi), %edi +; CHECK-SSE1-NEXT: andl %r11d, %edi ; CHECK-SSE1-NEXT: notl %r11d ; CHECK-SSE1-NEXT: andl 16(%rdx), %r11d -; CHECK-SSE1-NEXT: orl %eax, %r11d -; CHECK-SSE1-NEXT: movl 12(%rsi), %eax -; CHECK-SSE1-NEXT: andl %r10d, %eax +; CHECK-SSE1-NEXT: orl %edi, %r11d +; CHECK-SSE1-NEXT: movl 12(%rsi), %edi +; CHECK-SSE1-NEXT: andl %r10d, %edi ; CHECK-SSE1-NEXT: notl %r10d ; CHECK-SSE1-NEXT: andl 12(%rdx), %r10d -; CHECK-SSE1-NEXT: orl %eax, %r10d -; CHECK-SSE1-NEXT: movl 8(%rsi), %eax -; CHECK-SSE1-NEXT: andl %r9d, %eax +; CHECK-SSE1-NEXT: orl %edi, %r10d +; CHECK-SSE1-NEXT: movl 8(%rsi), %edi +; CHECK-SSE1-NEXT: andl %r9d, %edi ; CHECK-SSE1-NEXT: notl %r9d ; CHECK-SSE1-NEXT: andl 8(%rdx), %r9d -; CHECK-SSE1-NEXT: orl %eax, %r9d -; CHECK-SSE1-NEXT: movl 4(%rsi), %eax -; CHECK-SSE1-NEXT: andl %r8d, %eax +; CHECK-SSE1-NEXT: orl %edi, %r9d +; CHECK-SSE1-NEXT: movl 4(%rsi), %edi +; CHECK-SSE1-NEXT: andl %r8d, %edi ; CHECK-SSE1-NEXT: notl %r8d ; CHECK-SSE1-NEXT: andl 4(%rdx), %r8d -; CHECK-SSE1-NEXT: orl %eax, %r8d -; CHECK-SSE1-NEXT: movl (%rcx), %eax -; CHECK-SSE1-NEXT: movl (%rsi), %ecx -; CHECK-SSE1-NEXT: andl %eax, %ecx -; CHECK-SSE1-NEXT: notl %eax -; CHECK-SSE1-NEXT: andl (%rdx), %eax -; CHECK-SSE1-NEXT: orl %ecx, %eax -; CHECK-SSE1-NEXT: movl %ebp, 28(%rdi) -; CHECK-SSE1-NEXT: movl %ebx, 24(%rdi) -; CHECK-SSE1-NEXT: movl %r15d, 20(%rdi) -; CHECK-SSE1-NEXT: movl %r11d, 16(%rdi) -; CHECK-SSE1-NEXT: movl %r10d, 12(%rdi) -; CHECK-SSE1-NEXT: movl %r9d, 8(%rdi) -; CHECK-SSE1-NEXT: movl %r8d, 4(%rdi) -; CHECK-SSE1-NEXT: movl %eax, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: orl %edi, %r8d +; CHECK-SSE1-NEXT: movl (%rcx), %ecx +; CHECK-SSE1-NEXT: movl (%rsi), %esi +; CHECK-SSE1-NEXT: andl %ecx, %esi +; CHECK-SSE1-NEXT: notl %ecx +; CHECK-SSE1-NEXT: andl (%rdx), %ecx +; CHECK-SSE1-NEXT: orl %esi, %ecx +; CHECK-SSE1-NEXT: movl %ebp, 28(%rax) +; CHECK-SSE1-NEXT: movl %ebx, 24(%rax) +; CHECK-SSE1-NEXT: movl %r15d, 20(%rax) +; CHECK-SSE1-NEXT: movl %r11d, 16(%rax) +; CHECK-SSE1-NEXT: movl %r10d, 12(%rax) +; CHECK-SSE1-NEXT: movl %r9d, 8(%rax) +; CHECK-SSE1-NEXT: movl %r8d, 4(%rax) +; CHECK-SSE1-NEXT: movl %ecx, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 @@ -2457,70 +2463,70 @@ ; CHECK-BASELINE-LABEL: out_v4i64: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movq (%rcx), %r8 ; CHECK-BASELINE-NEXT: movq 8(%rcx), %r9 -; CHECK-BASELINE-NEXT: movq 16(%rcx), %rax +; CHECK-BASELINE-NEXT: movq 16(%rcx), %rdi ; CHECK-BASELINE-NEXT: movq 24(%rcx), %rcx ; CHECK-BASELINE-NEXT: movq 24(%rsi), %r10 ; CHECK-BASELINE-NEXT: andq %rcx, %r10 ; CHECK-BASELINE-NEXT: movq 16(%rsi), %r11 -; CHECK-BASELINE-NEXT: andq %rax, %r11 +; CHECK-BASELINE-NEXT: andq %rdi, %r11 ; CHECK-BASELINE-NEXT: movq 8(%rsi), %rbx ; CHECK-BASELINE-NEXT: andq %r9, %rbx ; CHECK-BASELINE-NEXT: movq (%rsi), %rsi ; CHECK-BASELINE-NEXT: andq %r8, %rsi ; CHECK-BASELINE-NEXT: notq %r8 ; CHECK-BASELINE-NEXT: notq %r9 -; CHECK-BASELINE-NEXT: notq %rax +; CHECK-BASELINE-NEXT: notq %rdi ; CHECK-BASELINE-NEXT: notq %rcx ; CHECK-BASELINE-NEXT: andq 24(%rdx), %rcx ; CHECK-BASELINE-NEXT: orq %r10, %rcx -; CHECK-BASELINE-NEXT: andq 16(%rdx), %rax -; CHECK-BASELINE-NEXT: orq %r11, %rax +; CHECK-BASELINE-NEXT: andq 16(%rdx), %rdi +; CHECK-BASELINE-NEXT: orq %r11, %rdi ; CHECK-BASELINE-NEXT: andq 8(%rdx), %r9 ; CHECK-BASELINE-NEXT: orq %rbx, %r9 ; CHECK-BASELINE-NEXT: andq (%rdx), %r8 ; CHECK-BASELINE-NEXT: orq %rsi, %r8 -; CHECK-BASELINE-NEXT: movq %rcx, 24(%rdi) -; CHECK-BASELINE-NEXT: movq %rax, 16(%rdi) -; CHECK-BASELINE-NEXT: movq %r9, 8(%rdi) -; CHECK-BASELINE-NEXT: movq %r8, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movq %rcx, 24(%rax) +; CHECK-BASELINE-NEXT: movq %rdi, 16(%rax) +; CHECK-BASELINE-NEXT: movq %r9, 8(%rax) +; CHECK-BASELINE-NEXT: movq %r8, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i64: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movq (%rcx), %r8 ; CHECK-SSE1-NEXT: movq 8(%rcx), %r9 -; CHECK-SSE1-NEXT: movq 16(%rcx), %rax +; CHECK-SSE1-NEXT: movq 16(%rcx), %rdi ; CHECK-SSE1-NEXT: movq 24(%rcx), %rcx ; CHECK-SSE1-NEXT: movq 24(%rsi), %r10 ; CHECK-SSE1-NEXT: andq %rcx, %r10 ; CHECK-SSE1-NEXT: movq 16(%rsi), %r11 -; CHECK-SSE1-NEXT: andq %rax, %r11 +; CHECK-SSE1-NEXT: andq %rdi, %r11 ; CHECK-SSE1-NEXT: movq 8(%rsi), %rbx ; CHECK-SSE1-NEXT: andq %r9, %rbx ; CHECK-SSE1-NEXT: movq (%rsi), %rsi ; CHECK-SSE1-NEXT: andq %r8, %rsi ; CHECK-SSE1-NEXT: notq %r8 ; CHECK-SSE1-NEXT: notq %r9 -; CHECK-SSE1-NEXT: notq %rax +; CHECK-SSE1-NEXT: notq %rdi ; CHECK-SSE1-NEXT: notq %rcx ; CHECK-SSE1-NEXT: andq 24(%rdx), %rcx ; CHECK-SSE1-NEXT: orq %r10, %rcx -; CHECK-SSE1-NEXT: andq 16(%rdx), %rax -; CHECK-SSE1-NEXT: orq %r11, %rax +; CHECK-SSE1-NEXT: andq 16(%rdx), %rdi +; CHECK-SSE1-NEXT: orq %r11, %rdi ; CHECK-SSE1-NEXT: andq 8(%rdx), %r9 ; CHECK-SSE1-NEXT: orq %rbx, %r9 ; CHECK-SSE1-NEXT: andq (%rdx), %r8 ; CHECK-SSE1-NEXT: orq %rsi, %r8 -; CHECK-SSE1-NEXT: movq %rcx, 24(%rdi) -; CHECK-SSE1-NEXT: movq %rax, 16(%rdi) -; CHECK-SSE1-NEXT: movq %r9, 8(%rdi) -; CHECK-SSE1-NEXT: movq %r8, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movq %rcx, 24(%rax) +; CHECK-SSE1-NEXT: movq %rdi, 16(%rax) +; CHECK-SSE1-NEXT: movq %r9, 8(%rax) +; CHECK-SSE1-NEXT: movq %r8, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: retq ; @@ -2565,10 +2571,11 @@ define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-LABEL: in_v1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: andl %edx, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %n0 = xor <1 x i8> %x, %y %n1 = and <1 x i8> %n0, %mask @@ -2583,25 +2590,27 @@ define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i8: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi -; CHECK-BASELINE-NEXT: xorl %edx, %edi -; CHECK-BASELINE-NEXT: andl %r8d, %edi +; CHECK-BASELINE-NEXT: xorl %edx, %eax +; CHECK-BASELINE-NEXT: andl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %ecx, %esi -; CHECK-BASELINE-NEXT: xorl %edx, %edi -; CHECK-BASELINE-NEXT: movl %edi, %eax +; CHECK-BASELINE-NEXT: xorl %edx, %eax +; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i8: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi -; CHECK-SSE1-NEXT: xorl %edx, %edi -; CHECK-SSE1-NEXT: andl %r8d, %edi +; CHECK-SSE1-NEXT: xorl %edx, %eax +; CHECK-SSE1-NEXT: andl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %ecx, %esi -; CHECK-SSE1-NEXT: xorl %edx, %edi -; CHECK-SSE1-NEXT: movl %edi, %eax +; CHECK-SSE1-NEXT: xorl %edx, %eax +; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; @@ -2625,10 +2634,11 @@ define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { ; CHECK-LABEL: in_v1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: andl %edx, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %n0 = xor <1 x i16> %x, %y %n1 = and <1 x i16> %n0, %mask @@ -2643,50 +2653,50 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i8: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: xorl %r9d, %esi +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: xorb %r11b, %cl -; CHECK-BASELINE-NEXT: xorb %r10b, %r8b +; CHECK-BASELINE-NEXT: xorb %r11b, %dl +; CHECK-BASELINE-NEXT: xorb %r10b, %cl +; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r9b, %sil -; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: xorb %r11b, %cl -; CHECK-BASELINE-NEXT: xorb %r10b, %r8b -; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) -; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) -; CHECK-BASELINE-NEXT: movb %dl, 1(%rdi) -; CHECK-BASELINE-NEXT: movb %sil, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: xorb %r11b, %dl +; CHECK-BASELINE-NEXT: xorb %r10b, %cl +; CHECK-BASELINE-NEXT: xorb %dil, %r8b +; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) +; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) +; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) +; CHECK-BASELINE-NEXT: movb %sil, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i8: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: xorl %r9d, %esi +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: xorb %r11b, %cl -; CHECK-SSE1-NEXT: xorb %r10b, %r8b +; CHECK-SSE1-NEXT: xorb %r11b, %dl +; CHECK-SSE1-NEXT: xorb %r10b, %cl +; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r9b, %sil -; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: xorb %r11b, %cl -; CHECK-SSE1-NEXT: xorb %r10b, %r8b -; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) -; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) -; CHECK-SSE1-NEXT: movb %dl, 1(%rdi) -; CHECK-SSE1-NEXT: movb %sil, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: xorb %r11b, %dl +; CHECK-SSE1-NEXT: xorb %r10b, %cl +; CHECK-SSE1-NEXT: xorb %dil, %r8b +; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) +; CHECK-SSE1-NEXT: movb %cl, 2(%rax) +; CHECK-SSE1-NEXT: movb %dl, 1(%rax) +; CHECK-SSE1-NEXT: movb %sil, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i8: @@ -2709,25 +2719,27 @@ define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i16: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi -; CHECK-BASELINE-NEXT: xorl %edx, %edi -; CHECK-BASELINE-NEXT: andl %r8d, %edi +; CHECK-BASELINE-NEXT: xorl %edx, %eax +; CHECK-BASELINE-NEXT: andl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %ecx, %esi -; CHECK-BASELINE-NEXT: xorl %edx, %edi -; CHECK-BASELINE-NEXT: movl %edi, %eax +; CHECK-BASELINE-NEXT: xorl %edx, %eax +; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i16: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi -; CHECK-SSE1-NEXT: xorl %edx, %edi -; CHECK-SSE1-NEXT: andl %r8d, %edi +; CHECK-SSE1-NEXT: xorl %edx, %eax +; CHECK-SSE1-NEXT: andl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %ecx, %esi -; CHECK-SSE1-NEXT: xorl %edx, %edi -; CHECK-SSE1-NEXT: movl %edi, %eax +; CHECK-SSE1-NEXT: xorl %edx, %eax +; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; @@ -2751,10 +2763,10 @@ define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { ; CHECK-LABEL: in_v1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: andl %edx, %eax +; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: retq %n0 = xor <1 x i32> %x, %y %n1 = and <1 x i32> %n0, %mask @@ -2776,47 +2788,46 @@ ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movl %ecx, %r10d -; CHECK-BASELINE-NEXT: movl %edx, %r11d -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl -; CHECK-BASELINE-NEXT: xorb %bpl, %sil -; CHECK-BASELINE-NEXT: xorb %r13b, %r11b -; CHECK-BASELINE-NEXT: xorb %r12b, %r10b -; CHECK-BASELINE-NEXT: xorb %r15b, %r8b -; CHECK-BASELINE-NEXT: xorb %r14b, %r9b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl -; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: xorb %r13b, %sil +; CHECK-BASELINE-NEXT: xorb %r12b, %dl +; CHECK-BASELINE-NEXT: xorb %r15b, %r10b +; CHECK-BASELINE-NEXT: xorb %r14b, %r8b +; CHECK-BASELINE-NEXT: xorb %bpl, %r9b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: xorb %bl, %al +; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl -; CHECK-BASELINE-NEXT: xorb %bpl, %sil -; CHECK-BASELINE-NEXT: xorb %r13b, %r11b -; CHECK-BASELINE-NEXT: xorb %r12b, %r10b -; CHECK-BASELINE-NEXT: xorb %r15b, %r8b -; CHECK-BASELINE-NEXT: xorb %r14b, %r9b -; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl +; CHECK-BASELINE-NEXT: xorb %r13b, %sil +; CHECK-BASELINE-NEXT: xorb %r12b, %dl +; CHECK-BASELINE-NEXT: xorb %r15b, %r10b +; CHECK-BASELINE-NEXT: xorb %r14b, %r8b +; CHECK-BASELINE-NEXT: xorb %bpl, %r9b +; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: xorb %bl, %al +; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: movb %al, 7(%rdi) ; CHECK-BASELINE-NEXT: movb %cl, 6(%rdi) -; CHECK-BASELINE-NEXT: movb %dl, 5(%rdi) +; CHECK-BASELINE-NEXT: movb %bl, 5(%rdi) ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) ; CHECK-BASELINE-NEXT: movb %r10b, 2(%rdi) -; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) +; CHECK-BASELINE-NEXT: movb %dl, 1(%rdi) ; CHECK-BASELINE-NEXT: movb %sil, (%rdi) ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: popq %rbx @@ -2836,47 +2847,46 @@ ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movl %ecx, %r10d -; CHECK-SSE1-NEXT: movl %edx, %r11d -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl -; CHECK-SSE1-NEXT: xorb %bpl, %sil -; CHECK-SSE1-NEXT: xorb %r13b, %r11b -; CHECK-SSE1-NEXT: xorb %r12b, %r10b -; CHECK-SSE1-NEXT: xorb %r15b, %r8b -; CHECK-SSE1-NEXT: xorb %r14b, %r9b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl -; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: xorb %r13b, %sil +; CHECK-SSE1-NEXT: xorb %r12b, %dl +; CHECK-SSE1-NEXT: xorb %r15b, %r10b +; CHECK-SSE1-NEXT: xorb %r14b, %r8b +; CHECK-SSE1-NEXT: xorb %bpl, %r9b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: xorb %bl, %al +; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl -; CHECK-SSE1-NEXT: xorb %bpl, %sil -; CHECK-SSE1-NEXT: xorb %r13b, %r11b -; CHECK-SSE1-NEXT: xorb %r12b, %r10b -; CHECK-SSE1-NEXT: xorb %r15b, %r8b -; CHECK-SSE1-NEXT: xorb %r14b, %r9b -; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl +; CHECK-SSE1-NEXT: xorb %r13b, %sil +; CHECK-SSE1-NEXT: xorb %r12b, %dl +; CHECK-SSE1-NEXT: xorb %r15b, %r10b +; CHECK-SSE1-NEXT: xorb %r14b, %r8b +; CHECK-SSE1-NEXT: xorb %bpl, %r9b +; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: xorb %bl, %al +; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: movb %al, 7(%rdi) ; CHECK-SSE1-NEXT: movb %cl, 6(%rdi) -; CHECK-SSE1-NEXT: movb %dl, 5(%rdi) +; CHECK-SSE1-NEXT: movb %bl, 5(%rdi) ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) ; CHECK-SSE1-NEXT: movb %r10b, 2(%rdi) -; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) +; CHECK-SSE1-NEXT: movb %dl, 1(%rdi) ; CHECK-SSE1-NEXT: movb %sil, (%rdi) ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: popq %rbx @@ -2907,50 +2917,50 @@ define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i16: ; CHECK-BASELINE: # %bb.0: +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx -; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: xorl %eax, %edx +; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-BASELINE-NEXT: xorl %r9d, %esi -; CHECK-BASELINE-NEXT: xorl %eax, %edx +; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d -; CHECK-BASELINE-NEXT: movw %r8w, 6(%rdi) -; CHECK-BASELINE-NEXT: movw %cx, 4(%rdi) -; CHECK-BASELINE-NEXT: movw %dx, 2(%rdi) -; CHECK-BASELINE-NEXT: movw %si, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) +; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) +; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) +; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i16: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorl %r11d, %ecx -; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: xorl %eax, %edx +; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-SSE1-NEXT: xorl %r9d, %esi -; CHECK-SSE1-NEXT: xorl %eax, %edx +; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: xorl %r11d, %ecx ; CHECK-SSE1-NEXT: xorl %r10d, %r8d -; CHECK-SSE1-NEXT: movw %r8w, 6(%rdi) -; CHECK-SSE1-NEXT: movw %cx, 4(%rdi) -; CHECK-SSE1-NEXT: movw %dx, 2(%rdi) -; CHECK-SSE1-NEXT: movw %si, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) +; CHECK-SSE1-NEXT: movw %cx, 4(%rax) +; CHECK-SSE1-NEXT: movw %dx, 2(%rax) +; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i16: @@ -2973,25 +2983,25 @@ define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i32: ; CHECK-BASELINE: # %bb.0: -; CHECK-BASELINE-NEXT: xorl %edx, %edi +; CHECK-BASELINE-NEXT: movl %edi, %eax +; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: andl %r9d, %esi -; CHECK-BASELINE-NEXT: andl %r8d, %edi -; CHECK-BASELINE-NEXT: xorl %edx, %edi +; CHECK-BASELINE-NEXT: andl %r8d, %eax +; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi -; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i32: ; CHECK-SSE1: # %bb.0: -; CHECK-SSE1-NEXT: xorl %edx, %edi +; CHECK-SSE1-NEXT: movl %edi, %eax +; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: andl %r9d, %esi -; CHECK-SSE1-NEXT: andl %r8d, %edi -; CHECK-SSE1-NEXT: xorl %edx, %edi +; CHECK-SSE1-NEXT: andl %r8d, %eax +; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi -; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; @@ -3015,10 +3025,10 @@ define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { ; CHECK-LABEL: in_v1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: xorq %rsi, %rdi -; CHECK-NEXT: andq %rdx, %rdi -; CHECK-NEXT: xorq %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: andq %rdx, %rax +; CHECK-NEXT: xorq %rsi, %rax ; CHECK-NEXT: retq %n0 = xor <1 x i64> %x, %y %n1 = and <1 x i64> %n0, %mask @@ -3042,24 +3052,26 @@ ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-BASELINE-NEXT: movq %rdi, %rdx ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-BASELINE-NEXT: xorb %al, %r9b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: xorb %dil, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b -; CHECK-BASELINE-NEXT: xorb %al, %r9b -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl -; CHECK-BASELINE-NEXT: xorb %r10b, %dl -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl -; CHECK-BASELINE-NEXT: xorb %r10b, %dl +; CHECK-BASELINE-NEXT: xorb %dil, %r9b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: xorb %r10b, %dil +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: xorb %r10b, %dil ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b @@ -3069,13 +3081,9 @@ ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %bl, %r11b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl -; CHECK-BASELINE-NEXT: xorb %bpl, %bl +; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl -; CHECK-BASELINE-NEXT: xorb %bpl, %bl -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl -; CHECK-BASELINE-NEXT: xorb %r13b, %bpl -; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl -; CHECK-BASELINE-NEXT: xorb %r13b, %bpl +; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b @@ -3089,54 +3097,57 @@ ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b -; CHECK-BASELINE-NEXT: xorb %sil, %r14b +; CHECK-BASELINE-NEXT: xorb %bpl, %r14b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b -; CHECK-BASELINE-NEXT: xorb %sil, %r14b +; CHECK-BASELINE-NEXT: xorb %bpl, %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: xorb %al, %bpl +; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %sil, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %sil, %cl -; CHECK-BASELINE-NEXT: movb %cl, 15(%rdi) -; CHECK-BASELINE-NEXT: movb %al, 14(%rdi) -; CHECK-BASELINE-NEXT: movb %r14b, 13(%rdi) -; CHECK-BASELINE-NEXT: movb %r15b, 12(%rdi) -; CHECK-BASELINE-NEXT: movb %r12b, 11(%rdi) -; CHECK-BASELINE-NEXT: movb %r13b, 10(%rdi) -; CHECK-BASELINE-NEXT: movb %bpl, 9(%rdi) -; CHECK-BASELINE-NEXT: movb %bl, 8(%rdi) -; CHECK-BASELINE-NEXT: movb %r11b, 7(%rdi) -; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdi) -; CHECK-BASELINE-NEXT: movb %dl, 5(%rdi) -; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) +; CHECK-BASELINE-NEXT: movb %cl, 15(%rdx) +; CHECK-BASELINE-NEXT: movb %al, 14(%rdx) +; CHECK-BASELINE-NEXT: movb %bpl, 13(%rdx) +; CHECK-BASELINE-NEXT: movb %r14b, 12(%rdx) +; CHECK-BASELINE-NEXT: movb %r15b, 11(%rdx) +; CHECK-BASELINE-NEXT: movb %r12b, 10(%rdx) +; CHECK-BASELINE-NEXT: movb %r13b, 9(%rdx) +; CHECK-BASELINE-NEXT: movb %bl, 8(%rdx) +; CHECK-BASELINE-NEXT: movb %r11b, 7(%rdx) +; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdx) +; CHECK-BASELINE-NEXT: movb %dil, 5(%rdx) +; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdx) ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %al, %r8b -; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) +; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdx) ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) +; CHECK-BASELINE-NEXT: movb %cl, 2(%rdx) ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: movb %cl, 1(%rdi) +; CHECK-BASELINE-NEXT: movb %cl, 1(%rdx) ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: movb %cl, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movb %cl, (%rdx) +; CHECK-BASELINE-NEXT: movq %rdx, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 @@ -3156,24 +3167,26 @@ ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-SSE1-NEXT: movq %rdi, %rdx ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al -; CHECK-SSE1-NEXT: xorb %al, %r9b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: xorb %dil, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b -; CHECK-SSE1-NEXT: xorb %al, %r9b -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl -; CHECK-SSE1-NEXT: xorb %r10b, %dl -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl -; CHECK-SSE1-NEXT: xorb %r10b, %dl +; CHECK-SSE1-NEXT: xorb %dil, %r9b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: xorb %r10b, %dil +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: xorb %r10b, %dil ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: xorb %r11b, %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b @@ -3183,13 +3196,9 @@ ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %bl, %r11b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl -; CHECK-SSE1-NEXT: xorb %bpl, %bl +; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl -; CHECK-SSE1-NEXT: xorb %bpl, %bl -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl -; CHECK-SSE1-NEXT: xorb %r13b, %bpl -; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl -; CHECK-SSE1-NEXT: xorb %r13b, %bpl +; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %r12b, %r13b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b @@ -3203,54 +3212,57 @@ ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: xorb %r14b, %r15b ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b -; CHECK-SSE1-NEXT: xorb %sil, %r14b +; CHECK-SSE1-NEXT: xorb %bpl, %r14b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b -; CHECK-SSE1-NEXT: xorb %sil, %r14b +; CHECK-SSE1-NEXT: xorb %bpl, %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: xorb %al, %bpl +; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl -; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %sil, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %sil, %cl -; CHECK-SSE1-NEXT: movb %cl, 15(%rdi) -; CHECK-SSE1-NEXT: movb %al, 14(%rdi) -; CHECK-SSE1-NEXT: movb %r14b, 13(%rdi) -; CHECK-SSE1-NEXT: movb %r15b, 12(%rdi) -; CHECK-SSE1-NEXT: movb %r12b, 11(%rdi) -; CHECK-SSE1-NEXT: movb %r13b, 10(%rdi) -; CHECK-SSE1-NEXT: movb %bpl, 9(%rdi) -; CHECK-SSE1-NEXT: movb %bl, 8(%rdi) -; CHECK-SSE1-NEXT: movb %r11b, 7(%rdi) -; CHECK-SSE1-NEXT: movb %r10b, 6(%rdi) -; CHECK-SSE1-NEXT: movb %dl, 5(%rdi) -; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) +; CHECK-SSE1-NEXT: movb %cl, 15(%rdx) +; CHECK-SSE1-NEXT: movb %al, 14(%rdx) +; CHECK-SSE1-NEXT: movb %bpl, 13(%rdx) +; CHECK-SSE1-NEXT: movb %r14b, 12(%rdx) +; CHECK-SSE1-NEXT: movb %r15b, 11(%rdx) +; CHECK-SSE1-NEXT: movb %r12b, 10(%rdx) +; CHECK-SSE1-NEXT: movb %r13b, 9(%rdx) +; CHECK-SSE1-NEXT: movb %bl, 8(%rdx) +; CHECK-SSE1-NEXT: movb %r11b, 7(%rdx) +; CHECK-SSE1-NEXT: movb %r10b, 6(%rdx) +; CHECK-SSE1-NEXT: movb %dil, 5(%rdx) +; CHECK-SSE1-NEXT: movb %r9b, 4(%rdx) ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %al, %r8b -; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) +; CHECK-SSE1-NEXT: movb %r8b, 3(%rdx) ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) +; CHECK-SSE1-NEXT: movb %cl, 2(%rdx) ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: movb %cl, 1(%rdi) +; CHECK-SSE1-NEXT: movb %cl, 1(%rdx) ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: movb %cl, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movb %cl, (%rdx) +; CHECK-SSE1-NEXT: movq %rdx, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 @@ -3282,12 +3294,13 @@ ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: xorl %r10d, %r9d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorl %r11d, %r8d -; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: xorl %eax, %ecx +; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: xorl %edi, %ecx ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorl %ebx, %esi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si @@ -3298,8 +3311,8 @@ ; CHECK-BASELINE-NEXT: xorl %ebx, %edx ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx -; CHECK-BASELINE-NEXT: xorl %eax, %ecx -; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: xorl %edi, %ecx +; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: xorl %r11d, %r8d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx @@ -3310,22 +3323,21 @@ ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp ; CHECK-BASELINE-NEXT: xorl %ebx, %ebp ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx -; CHECK-BASELINE-NEXT: xorw %ax, %bx +; CHECK-BASELINE-NEXT: xorw %di, %bx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx -; CHECK-BASELINE-NEXT: xorl %eax, %ebx -; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: xorw %r14w, %ax -; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %ax -; CHECK-BASELINE-NEXT: xorl %r14d, %eax -; CHECK-BASELINE-NEXT: movw %ax, 14(%rdi) -; CHECK-BASELINE-NEXT: movw %bx, 12(%rdi) -; CHECK-BASELINE-NEXT: movw %bp, 10(%rdi) -; CHECK-BASELINE-NEXT: movw %r9w, 8(%rdi) -; CHECK-BASELINE-NEXT: movw %r8w, 6(%rdi) -; CHECK-BASELINE-NEXT: movw %cx, 4(%rdi) -; CHECK-BASELINE-NEXT: movw %dx, 2(%rdi) -; CHECK-BASELINE-NEXT: movw %si, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: xorl %edi, %ebx +; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: xorw %r14w, %di +; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di +; CHECK-BASELINE-NEXT: xorl %r14d, %edi +; CHECK-BASELINE-NEXT: movw %di, 14(%rax) +; CHECK-BASELINE-NEXT: movw %bx, 12(%rax) +; CHECK-BASELINE-NEXT: movw %bp, 10(%rax) +; CHECK-BASELINE-NEXT: movw %r9w, 8(%rax) +; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) +; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) +; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) +; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %rbp @@ -3336,12 +3348,13 @@ ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: xorl %r10d, %r9d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorl %r11d, %r8d -; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: xorl %eax, %ecx +; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: xorl %edi, %ecx ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorl %ebx, %esi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si @@ -3352,8 +3365,8 @@ ; CHECK-SSE1-NEXT: xorl %ebx, %edx ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx -; CHECK-SSE1-NEXT: xorl %eax, %ecx -; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: xorl %edi, %ecx +; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: xorl %r11d, %r8d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx @@ -3364,22 +3377,21 @@ ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp ; CHECK-SSE1-NEXT: xorl %ebx, %ebp ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx -; CHECK-SSE1-NEXT: xorw %ax, %bx +; CHECK-SSE1-NEXT: xorw %di, %bx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx -; CHECK-SSE1-NEXT: xorl %eax, %ebx -; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: xorw %r14w, %ax -; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %ax -; CHECK-SSE1-NEXT: xorl %r14d, %eax -; CHECK-SSE1-NEXT: movw %ax, 14(%rdi) -; CHECK-SSE1-NEXT: movw %bx, 12(%rdi) -; CHECK-SSE1-NEXT: movw %bp, 10(%rdi) -; CHECK-SSE1-NEXT: movw %r9w, 8(%rdi) -; CHECK-SSE1-NEXT: movw %r8w, 6(%rdi) -; CHECK-SSE1-NEXT: movw %cx, 4(%rdi) -; CHECK-SSE1-NEXT: movw %dx, 2(%rdi) -; CHECK-SSE1-NEXT: movw %si, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: xorl %edi, %ebx +; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: xorw %r14w, %di +; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di +; CHECK-SSE1-NEXT: xorl %r14d, %edi +; CHECK-SSE1-NEXT: movw %di, 14(%rax) +; CHECK-SSE1-NEXT: movw %bx, 12(%rax) +; CHECK-SSE1-NEXT: movw %bp, 10(%rax) +; CHECK-SSE1-NEXT: movw %r9w, 8(%rax) +; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) +; CHECK-SSE1-NEXT: movw %cx, 4(%rax) +; CHECK-SSE1-NEXT: movw %dx, 2(%rax) +; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %rbp @@ -3406,43 +3418,43 @@ ; CHECK-BASELINE-LABEL: in_v4i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d ; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d ; CHECK-BASELINE-NEXT: movl (%rdx), %r11d ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d ; CHECK-BASELINE-NEXT: movl (%rsi), %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %edx -; CHECK-BASELINE-NEXT: movl 4(%rsi), %eax -; CHECK-BASELINE-NEXT: xorl %r10d, %eax +; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi +; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: movl 8(%rsi), %ebx ; CHECK-BASELINE-NEXT: xorl %r9d, %ebx ; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi ; CHECK-BASELINE-NEXT: andl 8(%rcx), %ebx -; CHECK-BASELINE-NEXT: andl 4(%rcx), %eax +; CHECK-BASELINE-NEXT: andl 4(%rcx), %edi ; CHECK-BASELINE-NEXT: andl (%rcx), %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %edx -; CHECK-BASELINE-NEXT: xorl %r10d, %eax +; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: xorl %r9d, %ebx ; CHECK-BASELINE-NEXT: xorl %r8d, %esi -; CHECK-BASELINE-NEXT: movl %esi, 12(%rdi) -; CHECK-BASELINE-NEXT: movl %ebx, 8(%rdi) -; CHECK-BASELINE-NEXT: movl %eax, 4(%rdi) -; CHECK-BASELINE-NEXT: movl %edx, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) +; CHECK-BASELINE-NEXT: movl %ebx, 8(%rax) +; CHECK-BASELINE-NEXT: movl %edi, 4(%rax) +; CHECK-BASELINE-NEXT: movl %edx, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i32: ; CHECK-SSE1: # %bb.0: +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i32: @@ -3472,25 +3484,25 @@ define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i64: ; CHECK-BASELINE: # %bb.0: -; CHECK-BASELINE-NEXT: xorq %rdx, %rdi +; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: xorq %rdx, %rax ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi ; CHECK-BASELINE-NEXT: andq %r9, %rsi -; CHECK-BASELINE-NEXT: andq %r8, %rdi -; CHECK-BASELINE-NEXT: xorq %rdx, %rdi +; CHECK-BASELINE-NEXT: andq %r8, %rax +; CHECK-BASELINE-NEXT: xorq %rdx, %rax ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi -; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movq %rsi, %rdx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i64: ; CHECK-SSE1: # %bb.0: -; CHECK-SSE1-NEXT: xorq %rdx, %rdi +; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: xorq %rdx, %rax ; CHECK-SSE1-NEXT: xorq %rcx, %rsi ; CHECK-SSE1-NEXT: andq %r9, %rsi -; CHECK-SSE1-NEXT: andq %r8, %rdi -; CHECK-SSE1-NEXT: xorq %rdx, %rdi +; CHECK-SSE1-NEXT: andq %r8, %rax +; CHECK-SSE1-NEXT: xorq %rdx, %rax ; CHECK-SSE1-NEXT: xorq %rcx, %rsi -; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movq %rsi, %rdx ; CHECK-SSE1-NEXT: retq ; @@ -4084,142 +4096,141 @@ ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx -; CHECK-BASELINE-NEXT: movq %rcx, %r8 -; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %eax -; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl 28(%rdx), %eax -; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %eax -; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movq %rcx, %r9 +; CHECK-BASELINE-NEXT: movq %rdi, %r10 +; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edi +; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movl 28(%rdx), %edi +; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %edi +; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 24(%rdx), %eax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %eax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl 20(%rdx), %r13d -; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r11d +; CHECK-BASELINE-NEXT: movl 20(%rdx), %r11d ; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl 16(%rdx), %r14d +; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r14d ; CHECK-BASELINE-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r15d +; CHECK-BASELINE-NEXT: movl 16(%rdx), %r15d ; CHECK-BASELINE-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl 12(%rdx), %r12d +; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r12d ; CHECK-BASELINE-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r10d -; CHECK-BASELINE-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d -; CHECK-BASELINE-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %ebx +; CHECK-BASELINE-NEXT: movl 12(%rdx), %r13d +; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r8d +; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx ; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl (%rdx), %eax -; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl 4(%rdx), %ebp +; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %ebp ; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %ecx +; CHECK-BASELINE-NEXT: movl (%rdx), %ecx ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movl 4(%rdx), %edi +; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %eax +; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl (%rsi), %edx -; CHECK-BASELINE-NEXT: xorw %ax, %dx +; CHECK-BASELINE-NEXT: xorw %cx, %dx ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %eax -; CHECK-BASELINE-NEXT: xorw %cx, %ax -; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %ecx +; CHECK-BASELINE-NEXT: xorw %ax, %cx +; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax -; CHECK-BASELINE-NEXT: xorw %bp, %ax +; CHECK-BASELINE-NEXT: xorw %di, %ax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %edx -; CHECK-BASELINE-NEXT: xorw %bx, %dx +; CHECK-BASELINE-NEXT: xorw %bp, %dx ; CHECK-BASELINE-NEXT: movl %edx, %eax ; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %ecx -; CHECK-BASELINE-NEXT: xorw %r9w, %cx -; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: xorw %bx, %cx ; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %edx -; CHECK-BASELINE-NEXT: xorw %r10w, %dx -; CHECK-BASELINE-NEXT: movl %edx, %ecx +; CHECK-BASELINE-NEXT: xorw %r8w, %dx +; CHECK-BASELINE-NEXT: movl %edx, %r8d ; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %edx -; CHECK-BASELINE-NEXT: xorw %r12w, %dx -; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %r12d +; CHECK-BASELINE-NEXT: xorw %r13w, %dx +; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %r13d +; CHECK-BASELINE-NEXT: xorw %r12w, %r13w +; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %r12d ; CHECK-BASELINE-NEXT: xorw %r15w, %r12w -; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %r15d +; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %r15d ; CHECK-BASELINE-NEXT: xorw %r14w, %r15w -; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %r14d +; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %r14d ; CHECK-BASELINE-NEXT: xorw %r11w, %r14w -; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %ebp -; CHECK-BASELINE-NEXT: xorw %r13w, %bp -; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %ebx +; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %ebp +; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload +; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %ebx ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload -; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %r11d +; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r11d ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload -; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r10d -; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r10w # 2-byte Folded Reload -; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %r9d -; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r9w # 2-byte Folded Reload -; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %r13d -; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r13w # 2-byte Folded Reload -; CHECK-BASELINE-NEXT: andw 30(%r8), %r13w -; CHECK-BASELINE-NEXT: andw 28(%r8), %r9w -; CHECK-BASELINE-NEXT: andw 26(%r8), %r10w -; CHECK-BASELINE-NEXT: andw 24(%r8), %r11w -; CHECK-BASELINE-NEXT: andw 22(%r8), %bx -; CHECK-BASELINE-NEXT: andw 20(%r8), %bp -; CHECK-BASELINE-NEXT: andw 18(%r8), %r14w -; CHECK-BASELINE-NEXT: andw 16(%r8), %r15w -; CHECK-BASELINE-NEXT: andw 14(%r8), %r12w -; CHECK-BASELINE-NEXT: andw 12(%r8), %dx +; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %edi +; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload +; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi +; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload +; CHECK-BASELINE-NEXT: andw 30(%r9), %si +; CHECK-BASELINE-NEXT: andw 28(%r9), %di +; CHECK-BASELINE-NEXT: andw 26(%r9), %r11w +; CHECK-BASELINE-NEXT: andw 24(%r9), %bx +; CHECK-BASELINE-NEXT: andw 22(%r9), %bp +; CHECK-BASELINE-NEXT: andw 20(%r9), %r14w +; CHECK-BASELINE-NEXT: andw 18(%r9), %r15w +; CHECK-BASELINE-NEXT: andw 16(%r9), %r12w +; CHECK-BASELINE-NEXT: andw 14(%r9), %r13w +; CHECK-BASELINE-NEXT: andw 12(%r9), %dx ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: andw 10(%r8), %cx -; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-BASELINE-NEXT: andw 8(%r8), %dx -; CHECK-BASELINE-NEXT: andw 6(%r8), %ax +; CHECK-BASELINE-NEXT: andw 10(%r9), %r8w +; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: movl %ecx, %edx +; CHECK-BASELINE-NEXT: andw 8(%r9), %dx +; CHECK-BASELINE-NEXT: andw 6(%r9), %ax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-BASELINE-NEXT: andw 4(%r8), %cx +; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload +; CHECK-BASELINE-NEXT: andw 4(%r9), %r8w ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; CHECK-BASELINE-NEXT: andw 2(%r8), %ax -; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload -; CHECK-BASELINE-NEXT: andw (%r8), %si -; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: movl %ecx, %esi +; CHECK-BASELINE-NEXT: andw 2(%r9), %ax ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-BASELINE-NEXT: andw (%r9), %cx ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload +; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload +; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: movl %edx, %r8d +; CHECK-BASELINE-NEXT: movl %edx, %ecx ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload -; CHECK-BASELINE-NEXT: movw %r13w, 30(%rdi) -; CHECK-BASELINE-NEXT: movw %r9w, 28(%rdi) -; CHECK-BASELINE-NEXT: movw %r10w, 26(%rdi) -; CHECK-BASELINE-NEXT: movw %r11w, 24(%rdi) -; CHECK-BASELINE-NEXT: movw %bx, 22(%rdi) -; CHECK-BASELINE-NEXT: movw %bp, 20(%rdi) -; CHECK-BASELINE-NEXT: movw %r14w, 18(%rdi) -; CHECK-BASELINE-NEXT: movw %r15w, 16(%rdi) -; CHECK-BASELINE-NEXT: movw %r12w, 14(%rdi) -; CHECK-BASELINE-NEXT: movw %ax, 12(%rdi) -; CHECK-BASELINE-NEXT: movw %dx, 10(%rdi) -; CHECK-BASELINE-NEXT: movw %r8w, 8(%rdi) -; CHECK-BASELINE-NEXT: movw %cx, 6(%rdi) -; CHECK-BASELINE-NEXT: movw %si, 4(%rdi) +; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-BASELINE-NEXT: movw %si, 30(%r10) +; CHECK-BASELINE-NEXT: movw %di, 28(%r10) +; CHECK-BASELINE-NEXT: movw %r11w, 26(%r10) +; CHECK-BASELINE-NEXT: movw %bx, 24(%r10) +; CHECK-BASELINE-NEXT: movw %bp, 22(%r10) +; CHECK-BASELINE-NEXT: movw %r14w, 20(%r10) +; CHECK-BASELINE-NEXT: movw %r15w, 18(%r10) +; CHECK-BASELINE-NEXT: movw %r12w, 16(%r10) +; CHECK-BASELINE-NEXT: movw %r13w, 14(%r10) +; CHECK-BASELINE-NEXT: movw %ax, 12(%r10) +; CHECK-BASELINE-NEXT: movw %dx, 10(%r10) +; CHECK-BASELINE-NEXT: movw %cx, 8(%r10) +; CHECK-BASELINE-NEXT: movw %r9w, 6(%r10) +; CHECK-BASELINE-NEXT: movw %r8w, 4(%r10) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; CHECK-BASELINE-NEXT: movw %ax, 2(%rdi) +; CHECK-BASELINE-NEXT: movw %ax, 2(%r10) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; CHECK-BASELINE-NEXT: movw %ax, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movw %ax, (%r10) +; CHECK-BASELINE-NEXT: movq %r10, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 @@ -4236,142 +4247,141 @@ ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx -; CHECK-SSE1-NEXT: movq %rcx, %r8 -; CHECK-SSE1-NEXT: movzwl 30(%rdx), %eax -; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl 28(%rdx), %eax -; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 26(%rdx), %eax -; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movq %rcx, %r9 +; CHECK-SSE1-NEXT: movq %rdi, %r10 +; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edi +; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movl 28(%rdx), %edi +; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movzwl 26(%rdx), %edi +; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 24(%rdx), %eax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 22(%rdx), %eax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl 20(%rdx), %r13d -; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r11d +; CHECK-SSE1-NEXT: movl 20(%rdx), %r11d ; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl 16(%rdx), %r14d +; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r14d ; CHECK-SSE1-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r15d +; CHECK-SSE1-NEXT: movl 16(%rdx), %r15d ; CHECK-SSE1-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl 12(%rdx), %r12d +; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r12d ; CHECK-SSE1-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r10d -; CHECK-SSE1-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl 8(%rdx), %r9d -; CHECK-SSE1-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 6(%rdx), %ebx +; CHECK-SSE1-NEXT: movl 12(%rdx), %r13d +; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r8d +; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx ; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl (%rdx), %eax -; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl 4(%rdx), %ebp +; CHECK-SSE1-NEXT: movzwl 6(%rdx), %ebp ; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 2(%rdx), %ecx +; CHECK-SSE1-NEXT: movl (%rdx), %ecx ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movl 4(%rdx), %edi +; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movzwl 2(%rdx), %eax +; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl (%rsi), %edx -; CHECK-SSE1-NEXT: xorw %ax, %dx +; CHECK-SSE1-NEXT: xorw %cx, %dx ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movzwl 2(%rsi), %eax -; CHECK-SSE1-NEXT: xorw %cx, %ax -; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movzwl 2(%rsi), %ecx +; CHECK-SSE1-NEXT: xorw %ax, %cx +; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax -; CHECK-SSE1-NEXT: xorw %bp, %ax +; CHECK-SSE1-NEXT: xorw %di, %ax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 6(%rsi), %edx -; CHECK-SSE1-NEXT: xorw %bx, %dx +; CHECK-SSE1-NEXT: xorw %bp, %dx ; CHECK-SSE1-NEXT: movl %edx, %eax ; CHECK-SSE1-NEXT: movzwl 8(%rsi), %ecx -; CHECK-SSE1-NEXT: xorw %r9w, %cx -; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: xorw %bx, %cx ; CHECK-SSE1-NEXT: movzwl 10(%rsi), %edx -; CHECK-SSE1-NEXT: xorw %r10w, %dx -; CHECK-SSE1-NEXT: movl %edx, %ecx +; CHECK-SSE1-NEXT: xorw %r8w, %dx +; CHECK-SSE1-NEXT: movl %edx, %r8d ; CHECK-SSE1-NEXT: movzwl 12(%rsi), %edx -; CHECK-SSE1-NEXT: xorw %r12w, %dx -; CHECK-SSE1-NEXT: movzwl 14(%rsi), %r12d +; CHECK-SSE1-NEXT: xorw %r13w, %dx +; CHECK-SSE1-NEXT: movzwl 14(%rsi), %r13d +; CHECK-SSE1-NEXT: xorw %r12w, %r13w +; CHECK-SSE1-NEXT: movzwl 16(%rsi), %r12d ; CHECK-SSE1-NEXT: xorw %r15w, %r12w -; CHECK-SSE1-NEXT: movzwl 16(%rsi), %r15d +; CHECK-SSE1-NEXT: movzwl 18(%rsi), %r15d ; CHECK-SSE1-NEXT: xorw %r14w, %r15w -; CHECK-SSE1-NEXT: movzwl 18(%rsi), %r14d +; CHECK-SSE1-NEXT: movzwl 20(%rsi), %r14d ; CHECK-SSE1-NEXT: xorw %r11w, %r14w -; CHECK-SSE1-NEXT: movzwl 20(%rsi), %ebp -; CHECK-SSE1-NEXT: xorw %r13w, %bp -; CHECK-SSE1-NEXT: movzwl 22(%rsi), %ebx +; CHECK-SSE1-NEXT: movzwl 22(%rsi), %ebp +; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload +; CHECK-SSE1-NEXT: movzwl 24(%rsi), %ebx ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload -; CHECK-SSE1-NEXT: movzwl 24(%rsi), %r11d +; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r11d ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload -; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r10d -; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r10w # 2-byte Folded Reload -; CHECK-SSE1-NEXT: movzwl 28(%rsi), %r9d -; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r9w # 2-byte Folded Reload -; CHECK-SSE1-NEXT: movzwl 30(%rsi), %r13d -; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r13w # 2-byte Folded Reload -; CHECK-SSE1-NEXT: andw 30(%r8), %r13w -; CHECK-SSE1-NEXT: andw 28(%r8), %r9w -; CHECK-SSE1-NEXT: andw 26(%r8), %r10w -; CHECK-SSE1-NEXT: andw 24(%r8), %r11w -; CHECK-SSE1-NEXT: andw 22(%r8), %bx -; CHECK-SSE1-NEXT: andw 20(%r8), %bp -; CHECK-SSE1-NEXT: andw 18(%r8), %r14w -; CHECK-SSE1-NEXT: andw 16(%r8), %r15w -; CHECK-SSE1-NEXT: andw 14(%r8), %r12w -; CHECK-SSE1-NEXT: andw 12(%r8), %dx +; CHECK-SSE1-NEXT: movzwl 28(%rsi), %edi +; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload +; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi +; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload +; CHECK-SSE1-NEXT: andw 30(%r9), %si +; CHECK-SSE1-NEXT: andw 28(%r9), %di +; CHECK-SSE1-NEXT: andw 26(%r9), %r11w +; CHECK-SSE1-NEXT: andw 24(%r9), %bx +; CHECK-SSE1-NEXT: andw 22(%r9), %bp +; CHECK-SSE1-NEXT: andw 20(%r9), %r14w +; CHECK-SSE1-NEXT: andw 18(%r9), %r15w +; CHECK-SSE1-NEXT: andw 16(%r9), %r12w +; CHECK-SSE1-NEXT: andw 14(%r9), %r13w +; CHECK-SSE1-NEXT: andw 12(%r9), %dx ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: andw 10(%r8), %cx -; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; CHECK-SSE1-NEXT: andw 8(%r8), %dx -; CHECK-SSE1-NEXT: andw 6(%r8), %ax +; CHECK-SSE1-NEXT: andw 10(%r9), %r8w +; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: movl %ecx, %edx +; CHECK-SSE1-NEXT: andw 8(%r9), %dx +; CHECK-SSE1-NEXT: andw 6(%r9), %ax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-SSE1-NEXT: andw 4(%r8), %cx +; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload +; CHECK-SSE1-NEXT: andw 4(%r9), %r8w ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; CHECK-SSE1-NEXT: andw 2(%r8), %ax -; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload -; CHECK-SSE1-NEXT: andw (%r8), %si -; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload -; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload -; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload -; CHECK-SSE1-NEXT: movl %ecx, %esi +; CHECK-SSE1-NEXT: andw 2(%r9), %ax ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-SSE1-NEXT: andw (%r9), %cx ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload +; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload +; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload -; CHECK-SSE1-NEXT: movl %edx, %r8d +; CHECK-SSE1-NEXT: movl %edx, %ecx ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload -; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload -; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload -; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload -; CHECK-SSE1-NEXT: movw %r13w, 30(%rdi) -; CHECK-SSE1-NEXT: movw %r9w, 28(%rdi) -; CHECK-SSE1-NEXT: movw %r10w, 26(%rdi) -; CHECK-SSE1-NEXT: movw %r11w, 24(%rdi) -; CHECK-SSE1-NEXT: movw %bx, 22(%rdi) -; CHECK-SSE1-NEXT: movw %bp, 20(%rdi) -; CHECK-SSE1-NEXT: movw %r14w, 18(%rdi) -; CHECK-SSE1-NEXT: movw %r15w, 16(%rdi) -; CHECK-SSE1-NEXT: movw %r12w, 14(%rdi) -; CHECK-SSE1-NEXT: movw %ax, 12(%rdi) -; CHECK-SSE1-NEXT: movw %dx, 10(%rdi) -; CHECK-SSE1-NEXT: movw %r8w, 8(%rdi) -; CHECK-SSE1-NEXT: movw %cx, 6(%rdi) -; CHECK-SSE1-NEXT: movw %si, 4(%rdi) +; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-SSE1-NEXT: movw %si, 30(%r10) +; CHECK-SSE1-NEXT: movw %di, 28(%r10) +; CHECK-SSE1-NEXT: movw %r11w, 26(%r10) +; CHECK-SSE1-NEXT: movw %bx, 24(%r10) +; CHECK-SSE1-NEXT: movw %bp, 22(%r10) +; CHECK-SSE1-NEXT: movw %r14w, 20(%r10) +; CHECK-SSE1-NEXT: movw %r15w, 18(%r10) +; CHECK-SSE1-NEXT: movw %r12w, 16(%r10) +; CHECK-SSE1-NEXT: movw %r13w, 14(%r10) +; CHECK-SSE1-NEXT: movw %ax, 12(%r10) +; CHECK-SSE1-NEXT: movw %dx, 10(%r10) +; CHECK-SSE1-NEXT: movw %cx, 8(%r10) +; CHECK-SSE1-NEXT: movw %r9w, 6(%r10) +; CHECK-SSE1-NEXT: movw %r8w, 4(%r10) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; CHECK-SSE1-NEXT: movw %ax, 2(%rdi) +; CHECK-SSE1-NEXT: movw %ax, 2(%r10) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; CHECK-SSE1-NEXT: movw %ax, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movw %ax, (%r10) +; CHECK-SSE1-NEXT: movq %r10, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 @@ -4581,62 +4591,62 @@ ; CHECK-BASELINE-LABEL: in_v4i64: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx +; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movq 24(%rdx), %r8 ; CHECK-BASELINE-NEXT: movq 16(%rdx), %r9 ; CHECK-BASELINE-NEXT: movq (%rdx), %r11 ; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 ; CHECK-BASELINE-NEXT: movq (%rsi), %rdx ; CHECK-BASELINE-NEXT: xorq %r11, %rdx -; CHECK-BASELINE-NEXT: movq 8(%rsi), %rax -; CHECK-BASELINE-NEXT: xorq %r10, %rax +; CHECK-BASELINE-NEXT: movq 8(%rsi), %rdi +; CHECK-BASELINE-NEXT: xorq %r10, %rdi ; CHECK-BASELINE-NEXT: movq 16(%rsi), %rbx ; CHECK-BASELINE-NEXT: xorq %r9, %rbx ; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi ; CHECK-BASELINE-NEXT: xorq %r8, %rsi ; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi ; CHECK-BASELINE-NEXT: andq 16(%rcx), %rbx -; CHECK-BASELINE-NEXT: andq 8(%rcx), %rax +; CHECK-BASELINE-NEXT: andq 8(%rcx), %rdi ; CHECK-BASELINE-NEXT: andq (%rcx), %rdx ; CHECK-BASELINE-NEXT: xorq %r11, %rdx -; CHECK-BASELINE-NEXT: xorq %r10, %rax +; CHECK-BASELINE-NEXT: xorq %r10, %rdi ; CHECK-BASELINE-NEXT: xorq %r9, %rbx ; CHECK-BASELINE-NEXT: xorq %r8, %rsi -; CHECK-BASELINE-NEXT: movq %rsi, 24(%rdi) -; CHECK-BASELINE-NEXT: movq %rbx, 16(%rdi) -; CHECK-BASELINE-NEXT: movq %rax, 8(%rdi) -; CHECK-BASELINE-NEXT: movq %rdx, (%rdi) -; CHECK-BASELINE-NEXT: movq %rdi, %rax +; CHECK-BASELINE-NEXT: movq %rsi, 24(%rax) +; CHECK-BASELINE-NEXT: movq %rbx, 16(%rax) +; CHECK-BASELINE-NEXT: movq %rdi, 8(%rax) +; CHECK-BASELINE-NEXT: movq %rdx, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i64: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbx +; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movq 24(%rdx), %r8 ; CHECK-SSE1-NEXT: movq 16(%rdx), %r9 ; CHECK-SSE1-NEXT: movq (%rdx), %r11 ; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 ; CHECK-SSE1-NEXT: movq (%rsi), %rdx ; CHECK-SSE1-NEXT: xorq %r11, %rdx -; CHECK-SSE1-NEXT: movq 8(%rsi), %rax -; CHECK-SSE1-NEXT: xorq %r10, %rax +; CHECK-SSE1-NEXT: movq 8(%rsi), %rdi +; CHECK-SSE1-NEXT: xorq %r10, %rdi ; CHECK-SSE1-NEXT: movq 16(%rsi), %rbx ; CHECK-SSE1-NEXT: xorq %r9, %rbx ; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi ; CHECK-SSE1-NEXT: xorq %r8, %rsi ; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi ; CHECK-SSE1-NEXT: andq 16(%rcx), %rbx -; CHECK-SSE1-NEXT: andq 8(%rcx), %rax +; CHECK-SSE1-NEXT: andq 8(%rcx), %rdi ; CHECK-SSE1-NEXT: andq (%rcx), %rdx ; CHECK-SSE1-NEXT: xorq %r11, %rdx -; CHECK-SSE1-NEXT: xorq %r10, %rax +; CHECK-SSE1-NEXT: xorq %r10, %rdi ; CHECK-SSE1-NEXT: xorq %r9, %rbx ; CHECK-SSE1-NEXT: xorq %r8, %rsi -; CHECK-SSE1-NEXT: movq %rsi, 24(%rdi) -; CHECK-SSE1-NEXT: movq %rbx, 16(%rdi) -; CHECK-SSE1-NEXT: movq %rax, 8(%rdi) -; CHECK-SSE1-NEXT: movq %rdx, (%rdi) -; CHECK-SSE1-NEXT: movq %rdi, %rax +; CHECK-SSE1-NEXT: movq %rsi, 24(%rax) +; CHECK-SSE1-NEXT: movq %rbx, 16(%rax) +; CHECK-SSE1-NEXT: movq %rdi, 8(%rax) +; CHECK-SSE1-NEXT: movq %rdx, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: retq ; Index: test/CodeGen/X86/urem-power-of-two.ll =================================================================== --- test/CodeGen/X86/urem-power-of-two.ll +++ test/CodeGen/X86/urem-power-of-two.ll @@ -14,8 +14,8 @@ ; ; X64-LABEL: const_pow_2: ; X64: # %bb.0: -; X64-NEXT: andl $31, %edi ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andl $31, %eax ; X64-NEXT: retq %urem = urem i64 %x, 32 ret i64 %urem @@ -35,8 +35,9 @@ ; ; X64-LABEL: shift_left_pow_2: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $1, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax ; X64-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF ; X64-NEXT: andl %edi, %eax @@ -61,8 +62,9 @@ ; ; X64-LABEL: shift_right_pow_2: ; X64: # %bb.0: -; X64-NEXT: movl $32768, %eax # imm = 0x8000 ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl $32768, %eax # imm = 0x8000 +; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: decl %eax ; X64-NEXT: andl %edi, %eax Index: test/CodeGen/X86/use-add-flags.ll =================================================================== --- test/CodeGen/X86/use-add-flags.ll +++ test/CodeGen/X86/use-add-flags.ll @@ -10,16 +10,16 @@ define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { ; LNX-LABEL: test1: ; LNX: # %bb.0: -; LNX-NEXT: addl (%rdi), %esi -; LNX-NEXT: cmovnsl %ecx, %edx ; LNX-NEXT: movl %edx, %eax +; LNX-NEXT: addl (%rdi), %esi +; LNX-NEXT: cmovnsl %ecx, %eax ; LNX-NEXT: retq ; ; WIN-LABEL: test1: ; WIN: # %bb.0: -; WIN-NEXT: addl (%rcx), %edx -; WIN-NEXT: cmovnsl %r9d, %r8d ; WIN-NEXT: movl %r8d, %eax +; WIN-NEXT: addl (%rcx), %edx +; WIN-NEXT: cmovnsl %r9d, %eax ; WIN-NEXT: retq %tmp2 = load i32, i32* %x, align 4 ; [#uses=1] %tmp4 = add i32 %tmp2, %y ; [#uses=1] Index: test/CodeGen/X86/vec_cast.ll =================================================================== --- test/CodeGen/X86/vec_cast.ll +++ test/CodeGen/X86/vec_cast.ll @@ -189,11 +189,13 @@ ; CHECK-LIN-LABEL: i: ; CHECK-LIN: # %bb.0: ; CHECK-LIN-NEXT: movl %edi, %eax +; CHECK-LIN-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: i: ; CHECK-WIN: # %bb.0: ; CHECK-WIN-NEXT: movl %ecx, %eax +; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-WIN-NEXT: retq %c = trunc <1 x i32> %a to <1 x i16> ret <1 x i16> %c Index: test/CodeGen/X86/vector-bitreverse.ll =================================================================== --- test/CodeGen/X86/vector-bitreverse.ll +++ test/CodeGen/X86/vector-bitreverse.ll @@ -14,38 +14,40 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; SSE-LABEL: test_bitreverse_i8: ; SSE: # %bb.0: -; SSE-NEXT: rolb $4, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $51, %al -; SSE-NEXT: shlb $2, %al -; SSE-NEXT: andb $-52, %dil -; SSE-NEXT: shrb $2, %dil -; SSE-NEXT: orb %al, %dil -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: andb $85, %al -; SSE-NEXT: addb %al, %al -; SSE-NEXT: andb $-86, %dil -; SSE-NEXT: shrb %dil -; SSE-NEXT: orb %al, %dil ; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: rolb $4, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $51, %cl +; SSE-NEXT: shlb $2, %cl +; SSE-NEXT: andb $-52, %al +; SSE-NEXT: shrb $2, %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: andb $85, %cl +; SSE-NEXT: addb %cl, %cl +; SSE-NEXT: andb $-86, %al +; SSE-NEXT: shrb %al +; SSE-NEXT: orb %cl, %al +; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_bitreverse_i8: ; AVX: # %bb.0: -; AVX-NEXT: rolb $4, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $51, %al -; AVX-NEXT: shlb $2, %al -; AVX-NEXT: andb $-52, %dil -; AVX-NEXT: shrb $2, %dil -; AVX-NEXT: orb %al, %dil -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: andb $85, %al -; AVX-NEXT: addb %al, %al -; AVX-NEXT: andb $-86, %dil -; AVX-NEXT: shrb %dil -; AVX-NEXT: orb %al, %dil ; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: rolb $4, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $51, %cl +; AVX-NEXT: shlb $2, %cl +; AVX-NEXT: andb $-52, %al +; AVX-NEXT: shrb $2, %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: andb $85, %cl +; AVX-NEXT: addb %cl, %cl +; AVX-NEXT: andb $-86, %al +; AVX-NEXT: shrb %al +; AVX-NEXT: orb %cl, %al +; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; ; XOP-LABEL: test_bitreverse_i8: Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -338,30 +338,30 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { ; SSE2-LABEL: vsel_double8: ; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double8: ; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double8: ; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] ; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double8: @@ -377,30 +377,30 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { ; SSE2-LABEL: vsel_i648: ; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm7, %xmm3 +; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movaps %xmm5, %xmm1 ; SSE2-NEXT: movapd %xmm6, %xmm2 -; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_i648: ; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm7, %xmm3 +; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] ; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] ; SSSE3-NEXT: movapd %xmm4, %xmm0 -; SSSE3-NEXT: movaps %xmm5, %xmm1 ; SSSE3-NEXT: movapd %xmm6, %xmm2 -; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_i648: ; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movaps %xmm7, %xmm3 +; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] ; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] -; SSE41-NEXT: movaps %xmm5, %xmm1 -; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_i648: @@ -528,22 +528,22 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; SSE2-LABEL: constant_blendvpd_avx: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: constant_blendvpd_avx: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movaps %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] ; SSSE3-NEXT: movapd %xmm3, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: constant_blendvpd_avx: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_blendvpd_avx: @@ -740,20 +740,20 @@ define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { ; SSE2-LABEL: blend_shufflevector_4xi64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_shufflevector_4xi64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_shufflevector_4xi64: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: blend_shufflevector_4xi64: Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -344,254 +344,254 @@ define <32 x i1> @test_cmp_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i8: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i8: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm3, %xmm1 -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: pextrb $1, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm0, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm0, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm0, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm0, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i8: @@ -933,6 +933,7 @@ define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i16: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -940,253 +941,252 @@ ; SSE2-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE2-NEXT: packsswb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i16: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtw %xmm7, %xmm3 ; SSE42-NEXT: pcmpgtw %xmm6, %xmm2 -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: pextrb $2, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: pextrb $0, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: pextrb $10, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: pextrb $12, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm1, %edx +; SSE42-NEXT: pextrb $12, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i16: @@ -1247,500 +1247,501 @@ define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i8: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE2-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE2-NEXT: pcmpgtb %xmm7, %xmm3 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 6(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 6(%rdi) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 4(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 4(%rdi) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i8: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 ; SSE42-NEXT: pcmpgtb %xmm5, %xmm1 ; SSE42-NEXT: pcmpgtb %xmm6, %xmm2 ; SSE42-NEXT: pcmpgtb %xmm7, %xmm3 -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm3, %ecx +; SSE42-NEXT: pextrb $1, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: pextrb $5, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm2, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm2, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm2, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm2, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 6(%rdi) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm2, %ecx +; SSE42-NEXT: pextrb $5, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm2, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm2, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 4(%rdi) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm0, %edx -; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm0, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm0, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm0, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm0, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v64i8: ; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4 @@ -1749,509 +1750,508 @@ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm2, %ecx +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm2, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm0, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm0, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm0, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm0, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm0, %edx +; AVX1-NEXT: vpextrb $0, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm4, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm1, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm4, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $4, %edx +; AVX1-NEXT: orl %ecx, %edx +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: shll $5, %ecx +; AVX1-NEXT: orl %edx, %ecx +; AVX1-NEXT: vpextrb $6, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm4, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm2, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 4(%rdi) +; AVX1-NEXT: vpextrb $1, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: vpextrb $0, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $4, %edx +; AVX1-NEXT: orl %ecx, %edx +; AVX1-NEXT: vpextrb $5, %xmm0, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: shll $5, %ecx +; AVX1-NEXT: orl %edx, %ecx +; AVX1-NEXT: vpextrb $6, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm0, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm4, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm4, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm4, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm4, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm4, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm4, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm4, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i8: ; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $3, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $3, %xmm1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 4(%rdi) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: vpextrb $0, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $3, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx +; AVX2-NEXT: vpextrb $6, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm0, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $3, %xmm0, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2394,6 +2394,7 @@ define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 ; SSE2-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 @@ -2426,130 +2427,130 @@ ; SSE2-NEXT: packuswb %xmm11, %xmm9 ; SSE2-NEXT: packuswb %xmm10, %xmm9 ; SSE2-NEXT: movdqa %xmm9, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f32: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm15 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: movaps {{[0-9]+}}(%rsp), %xmm13 @@ -2566,125 +2567,124 @@ ; SSE42-NEXT: cmpltps %xmm6, %xmm13 ; SSE42-NEXT: cmpltps %xmm5, %xmm14 ; SSE42-NEXT: cmpltps %xmm4, %xmm15 -; SSE42-NEXT: pextrb $4, %xmm15, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm14, %ecx +; SSE42-NEXT: pextrb $4, %xmm15, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm14, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm14, %edx +; SSE42-NEXT: pextrb $0, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm13, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm13, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm13, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm12, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm12, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm12, %ecx +; SSE42-NEXT: pextrb $4, %xmm14, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm12, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm11, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm10, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm10, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm10, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm10, %edx +; SSE42-NEXT: pextrb $8, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm9, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm9, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm14, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm13, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm13, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm13, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm9, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm9, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm13, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm12, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm12, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm12, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm8, %ecx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm12, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $4, %xmm11, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm8, %edx +; SSE42-NEXT: pextrb $0, %xmm11, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm11, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm11, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm10, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm8, %ecx +; SSE42-NEXT: pextrb $4, %xmm10, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm8, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $8, %xmm10, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm10, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm9, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm9, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm9, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm9, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm8, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm8, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm8, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm8, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32f32: @@ -2954,6 +2954,7 @@ define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255] ; SSE2-NEXT: pand %xmm8, %xmm3 @@ -2978,130 +2979,130 @@ ; SSE2-NEXT: packuswb %xmm5, %xmm4 ; SSE2-NEXT: packuswb %xmm6, %xmm4 ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i32: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm1 @@ -3110,125 +3111,124 @@ ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm4 -; SSE42-NEXT: pextrb $4, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm5, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm5, %ecx +; SSE42-NEXT: pextrb $4, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm5, %edx +; SSE42-NEXT: pextrb $0, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm6, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm6, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm7, %ecx +; SSE42-NEXT: pextrb $4, %xmm5, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $4, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $12, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $4, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: pextrb $8, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm6, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm3, %ecx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $4, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $8, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $12, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $12, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $4, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $12, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i32: @@ -3309,6 +3309,7 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; SSE2-LABEL: test_cmp_v64i16: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 @@ -3322,250 +3323,250 @@ ; SSE2-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: packsswb %xmm7, %xmm6 ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 6(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 6(%rdi) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 4(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 4(%rdi) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v64i16: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm3 @@ -3574,247 +3575,247 @@ ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm4 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm7 ; SSE42-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 -; SSE42-NEXT: pextrb $2, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm6, %ecx +; SSE42-NEXT: pextrb $2, %xmm6, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm6, %edx +; SSE42-NEXT: pextrb $0, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm7, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm7, %ecx +; SSE42-NEXT: pextrb $10, %xmm6, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 6(%rdi) -; SSE42-NEXT: pextrb $2, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm4, %edx +; SSE42-NEXT: pextrb $12, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm5, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm5, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm5, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm5, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 4(%rdi) -; SSE42-NEXT: pextrb $2, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm2, %ecx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 6(%rdi) +; SSE42-NEXT: pextrb $2, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: pextrb $0, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx +; SSE42-NEXT: pextrb $10, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $2, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $8, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $10, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: pextrb $12, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm5, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 4(%rdi) +; SSE42-NEXT: pextrb $2, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $2, %xmm1, %edx +; SSE42-NEXT: pextrb $12, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $4, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $2, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $6, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $6, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: pextrb $10, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: pextrb $12, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $14, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $14, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $2, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $4, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $10, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $14, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v64i16: ; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 ; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 @@ -3831,258 +3832,258 @@ ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm7, %xmm7 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm2 -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $4, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $6, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $8, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $12, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $14, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm7, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm7, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm7, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm7, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm7, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm4, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $4, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $6, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $8, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $12, %xmm0, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $14, %xmm0, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm5, %ecx +; AVX1-NEXT: vpextrb $2, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm5, %edx +; AVX1-NEXT: vpextrb $0, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm5, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $4, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm5, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $6, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm9, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $8, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm9, %ecx +; AVX1-NEXT: vpextrb $10, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm9, %edx +; AVX1-NEXT: vpextrb $12, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm9, %ecx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $14, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm7, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm7, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm7, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm7, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm7, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm3, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm3, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm3, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm3, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm4, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 4(%rdi) +; AVX1-NEXT: vpextrb $2, %xmm0, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm9, %edx +; AVX1-NEXT: vpextrb $0, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm9, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $4, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $2, %xmm8, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $6, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $6, %xmm8, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $8, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm8, %ecx +; AVX1-NEXT: vpextrb $10, %xmm0, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $10, %xmm8, %edx +; AVX1-NEXT: vpextrb $12, %xmm0, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $14, %xmm8, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $14, %xmm0, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $2, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $6, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $10, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $14, %xmm8, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v64i16: ; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm5 ; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm1 ; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm4 @@ -4091,253 +4092,252 @@ ; AVX2-NEXT: vextracti128 $1, %ymm7, %xmm3 ; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm6 ; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm2 -; AVX2-NEXT: vpextrb $2, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $4, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $6, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $8, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $10, %xmm6, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $12, %xmm6, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $14, %xmm6, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm2, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm2, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm2, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm2, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm7, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm7, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm7, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm7, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm7, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm3, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $4, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $6, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $8, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $10, %xmm4, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $12, %xmm4, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $14, %xmm4, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm5, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm5, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm5, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm5, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm5, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $2, %xmm1, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; AVX2-NEXT: vpextrb $2, %xmm6, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $6, %xmm1, %edx +; AVX2-NEXT: vpextrb $0, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $10, %xmm1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $4, %xmm6, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $14, %xmm1, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $6, %xmm6, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $8, %xmm6, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $4, %edx +; AVX2-NEXT: orl %ecx, %edx +; AVX2-NEXT: vpextrb $10, %xmm6, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: shll $5, %ecx +; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: vpextrb $12, %xmm6, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $14, %xmm6, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm7, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm7, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm7, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm7, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm7, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm3, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 4(%rdi) +; AVX2-NEXT: vpextrb $2, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: vpextrb $0, %xmm4, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $4, %xmm4, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX2-NEXT: vpextrb $6, %xmm4, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $8, %xmm4, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $4, %edx +; AVX2-NEXT: orl %ecx, %edx +; AVX2-NEXT: vpextrb $10, %xmm4, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: shll $5, %ecx +; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: vpextrb $12, %xmm4, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $14, %xmm4, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm5, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm5, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $0, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $2, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $6, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $10, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $14, %xmm1, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -4390,6 +4390,7 @@ ; SSE2-LABEL: test_cmp_v128i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -4399,491 +4400,491 @@ ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE2-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 ; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 14(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 14(%rdi) ; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 12(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 12(%rdi) ; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 10(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 10(%rdi) ; SSE2-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 8(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 8(%rdi) ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 6(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 6(%rdi) ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 4(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 4(%rdi) ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v128i8: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 @@ -4892,483 +4893,483 @@ ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm5 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 ; SSE42-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 -; SSE42-NEXT: pextrb $1, %xmm7, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm7, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm7, %ecx +; SSE42-NEXT: pextrb $1, %xmm7, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm7, %edx +; SSE42-NEXT: pextrb $0, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm7, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm7, %ecx +; SSE42-NEXT: pextrb $5, %xmm7, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm7, %edx +; SSE42-NEXT: pextrb $6, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm7, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm7, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 14(%rdi) -; SSE42-NEXT: pextrb $1, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm6, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm6, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm6, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm6, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm7, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm7, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm6, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 14(%rdi) +; SSE42-NEXT: pextrb $1, %xmm6, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm6, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 12(%rdi) -; SSE42-NEXT: pextrb $1, %xmm5, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm5, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm5, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm5, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm5, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm5, %ecx +; SSE42-NEXT: pextrb $5, %xmm6, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm5, %edx +; SSE42-NEXT: pextrb $6, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm5, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm5, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 10(%rdi) -; SSE42-NEXT: pextrb $1, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm4, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm4, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm4, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm4, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm4, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm6, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm4, %ecx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm6, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 12(%rdi) +; SSE42-NEXT: pextrb $1, %xmm5, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm4, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 8(%rdi) -; SSE42-NEXT: pextrb $1, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm3, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm3, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm3, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm3, %ecx +; SSE42-NEXT: pextrb $5, %xmm5, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm3, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 6(%rdi) -; SSE42-NEXT: pextrb $1, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm2, %edx +; SSE42-NEXT: pextrb $6, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm2, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm2, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm2, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm5, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm2, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 4(%rdi) -; SSE42-NEXT: pextrb $1, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm1, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm1, %ecx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm5, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 10(%rdi) +; SSE42-NEXT: pextrb $1, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm4, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm4, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm1, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm4, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 8(%rdi) +; SSE42-NEXT: pextrb $1, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm1, %edx +; SSE42-NEXT: pextrb $0, %xmm3, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm3, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm1, %edx +; SSE42-NEXT: pextrb $6, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm3, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 6(%rdi) +; SSE42-NEXT: pextrb $1, %xmm2, %ecx +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: pextrb $0, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm1, %ecx +; SSE42-NEXT: pextrb $5, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm1, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $1, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $2, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $3, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $5, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $6, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $7, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm2, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $8, %xmm0, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm2, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm2, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 4(%rdi) +; SSE42-NEXT: pextrb $1, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $9, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $10, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm1, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $11, %xmm0, %edx +; SSE42-NEXT: pextrb $6, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $12, %xmm0, %ecx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm1, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm1, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $1, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $13, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $2, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $3, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $4, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $14, %xmm0, %ecx +; SSE42-NEXT: pextrb $5, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $15, %xmm0, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $6, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $7, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $8, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $9, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $10, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $11, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $12, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $13, %xmm0, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $14, %xmm0, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $15, %xmm0, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v128i8: ; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9 ; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8 @@ -5385,1010 +5386,1010 @@ ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm6, %xmm6 ; AVX1-NEXT: vpcmpgtb %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $1, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm3, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm3, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm3, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm6, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm6, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm6, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 12(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm2, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm2, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm5, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm5, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm5, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 8(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm1, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm1, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm4, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm4, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm4, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm9, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: vpextrb $0, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rcx,%rax,2), %eax -; AVX1-NEXT: vpextrb $2, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,4), %eax -; AVX1-NEXT: vpextrb $3, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: leal (%rax,%rcx,8), %eax -; AVX1-NEXT: vpextrb $4, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $4, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: vpextrb $5, %xmm9, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: shll $5, %eax -; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: vpextrb $6, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $6, %ecx -; AVX1-NEXT: vpextrb $7, %xmm9, %edx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $7, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm9, %ecx +; AVX1-NEXT: vpextrb $1, %xmm3, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $8, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm9, %edx +; AVX1-NEXT: vpextrb $0, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $9, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $10, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm9, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $11, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $12, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm9, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $13, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm9, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $14, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm9, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $15, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $0, %xmm8, %ecx +; AVX1-NEXT: vpextrb $5, %xmm3, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $16, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $1, %xmm8, %edx +; AVX1-NEXT: vpextrb $6, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $17, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $2, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $18, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $3, %xmm8, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm3, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $19, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $4, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $20, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $5, %xmm8, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm3, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm3, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $21, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $6, %xmm8, %ecx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm3, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm6, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm6, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm6, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 12(%rdi) +; AVX1-NEXT: vpextrb $1, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $22, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $7, %xmm8, %edx +; AVX1-NEXT: vpextrb $0, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $23, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $8, %xmm8, %ecx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $24, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $9, %xmm8, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $25, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $10, %xmm8, %ecx +; AVX1-NEXT: vpextrb $5, %xmm2, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $26, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $11, %xmm8, %edx +; AVX1-NEXT: vpextrb $6, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm2, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm2, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $27, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $12, %xmm8, %ecx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm2, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm5, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm5, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm5, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 8(%rdi) +; AVX1-NEXT: vpextrb $1, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: vpextrb $0, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $4, %edx +; AVX1-NEXT: orl %ecx, %edx +; AVX1-NEXT: vpextrb $5, %xmm1, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: shll $5, %ecx +; AVX1-NEXT: orl %edx, %ecx +; AVX1-NEXT: vpextrb $6, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm1, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm1, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm4, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm4, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm4, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, 4(%rdi) +; AVX1-NEXT: vpextrb $1, %xmm9, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $28, %ecx -; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $13, %xmm8, %edx +; AVX1-NEXT: vpextrb $0, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX1-NEXT: vpextrb $2, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: leal (%rcx,%rdx,4), %ecx +; AVX1-NEXT: vpextrb $3, %xmm9, %edx ; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: shll $29, %edx +; AVX1-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX1-NEXT: vpextrb $4, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $4, %edx ; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: vpextrb $14, %xmm8, %ecx +; AVX1-NEXT: vpextrb $5, %xmm9, %ecx ; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: shll $30, %ecx +; AVX1-NEXT: shll $5, %ecx ; AVX1-NEXT: orl %edx, %ecx -; AVX1-NEXT: vpextrb $15, %xmm8, %edx -; AVX1-NEXT: shll $31, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: orl %eax, %edx -; AVX1-NEXT: movl %edx, (%rdi) -; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: vpextrb $6, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $6, %edx +; AVX1-NEXT: vpextrb $7, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $7, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $8, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $9, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $10, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $11, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $12, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $13, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm9, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $14, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm9, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $15, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $0, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $16, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $1, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $17, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $2, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $18, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $3, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $19, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $4, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $20, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $5, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $21, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $6, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $22, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $7, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $23, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $8, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $24, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $9, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $25, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $10, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $26, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $11, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $27, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $12, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $28, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $13, %xmm8, %esi +; AVX1-NEXT: andl $1, %esi +; AVX1-NEXT: shll $29, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: vpextrb $14, %xmm8, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: shll $30, %edx +; AVX1-NEXT: orl %esi, %edx +; AVX1-NEXT: vpextrb $15, %xmm8, %esi +; AVX1-NEXT: shll $31, %esi +; AVX1-NEXT: orl %edx, %esi +; AVX1-NEXT: orl %ecx, %esi +; AVX1-NEXT: movl %esi, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_cmp_v128i8: ; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm3, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm3, %ecx +; AVX2-NEXT: vpextrb $1, %xmm3, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm3, %edx +; AVX2-NEXT: vpextrb $0, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3 -; AVX2-NEXT: vpextrb $0, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm3, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm3, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm3, %ecx +; AVX2-NEXT: vpextrb $5, %xmm3, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm3, %edx +; AVX2-NEXT: vpextrb $6, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm3, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm3, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm3, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3 +; AVX2-NEXT: vpextrb $0, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm3, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm3, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 12(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm2, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm2, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm2, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm2, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm3, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm3, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm3, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm2, %ecx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm3, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 12(%rdi) +; AVX2-NEXT: vpextrb $1, %xmm2, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm2, %edx +; AVX2-NEXT: vpextrb $0, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 -; AVX2-NEXT: vpextrb $0, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm2, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm2, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm2, %ecx +; AVX2-NEXT: vpextrb $5, %xmm2, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm2, %edx +; AVX2-NEXT: vpextrb $6, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm2, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm2, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm2, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX2-NEXT: vpextrb $0, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm2, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm2, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 8(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm2, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm2, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm2, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm2, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 8(%rdi) +; AVX2-NEXT: vpextrb $1, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm1, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm1, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; AVX2-NEXT: vpextrb $5, %xmm1, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm1, %edx +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX2-NEXT: vpextrb $0, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm1, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm1, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rcx,%rax,2), %eax -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,4), %eax -; AVX2-NEXT: vpextrb $3, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: leal (%rax,%rcx,8), %eax -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $4, %ecx -; AVX2-NEXT: orl %eax, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: shll $5, %eax -; AVX2-NEXT: orl %ecx, %eax -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $6, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $7, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $8, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $9, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $10, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $11, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $12, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm1, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm1, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm1, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $13, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm1, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, 4(%rdi) +; AVX2-NEXT: vpextrb $1, %xmm0, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $14, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx +; AVX2-NEXT: vpextrb $0, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $15, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $1, %xmm0, %edx +; AVX2-NEXT: leal (%rdx,%rcx,2), %ecx +; AVX2-NEXT: vpextrb $2, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $17, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $2, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $18, %ecx -; AVX2-NEXT: orl %edx, %ecx +; AVX2-NEXT: leal (%rcx,%rdx,4), %ecx ; AVX2-NEXT: vpextrb $3, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $19, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $4, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $20, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $5, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $21, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $6, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $22, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $7, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $23, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $8, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $24, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $9, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $25, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $10, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $26, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $11, %xmm0, %edx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $27, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $12, %xmm0, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $28, %ecx -; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $13, %xmm0, %edx +; AVX2-NEXT: leal (%rcx,%rdx,8), %ecx +; AVX2-NEXT: vpextrb $4, %xmm0, %edx ; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: shll $29, %edx +; AVX2-NEXT: shll $4, %edx ; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: vpextrb $14, %xmm0, %ecx +; AVX2-NEXT: vpextrb $5, %xmm0, %ecx ; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: shll $30, %ecx +; AVX2-NEXT: shll $5, %ecx ; AVX2-NEXT: orl %edx, %ecx -; AVX2-NEXT: vpextrb $15, %xmm0, %edx -; AVX2-NEXT: shll $31, %edx -; AVX2-NEXT: orl %ecx, %edx -; AVX2-NEXT: orl %eax, %edx -; AVX2-NEXT: movl %edx, (%rdi) -; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: vpextrb $6, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $6, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $7, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $8, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $9, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $10, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $11, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $12, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $13, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $14, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $15, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $16, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $1, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $17, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $2, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $18, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $3, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $19, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $20, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $5, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $21, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $6, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $22, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $7, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $23, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $24, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $9, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $25, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $10, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $26, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $11, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $27, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $12, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $28, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $13, %xmm0, %esi +; AVX2-NEXT: andl $1, %esi +; AVX2-NEXT: shll $29, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: vpextrb $14, %xmm0, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: shll $30, %edx +; AVX2-NEXT: orl %esi, %edx +; AVX2-NEXT: vpextrb $15, %xmm0, %esi +; AVX2-NEXT: shll $31, %esi +; AVX2-NEXT: orl %edx, %esi +; AVX2-NEXT: orl %ecx, %esi +; AVX2-NEXT: movl %esi, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test_cmp_v128i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 @@ -6421,12 +6422,12 @@ ; AVX512F-NEXT: kmovw %k2, 4(%rdi) ; AVX512F-NEXT: kmovw %k1, 2(%rdi) ; AVX512F-NEXT: kmovw %k0, (%rdi) -; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v128i8: ; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512DQ-NEXT: vpmovd2m %zmm4, %k0 @@ -6459,7 +6460,6 @@ ; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) -; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -6481,6 +6481,7 @@ define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32f64: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE2-NEXT: cmpltpd %xmm1, %xmm8 ; SSE2-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 @@ -6565,126 +6566,125 @@ ; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] ; SSE2-NEXT: movapd %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movapd %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32f64: @@ -6695,7 +6695,7 @@ ; SSE42-NEXT: pushq %r13 ; SSE42-NEXT: pushq %r12 ; SSE42-NEXT: pushq %rbx -; SSE42-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; SSE42-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 ; SSE42-NEXT: cmpltpd %xmm7, %xmm8 ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 @@ -6730,24 +6730,24 @@ ; SSE42-NEXT: pextrb $8, %xmm0, %r9d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pextrb $0, %xmm0, %esi +; SSE42-NEXT: pextrb $0, %xmm0, %edx ; SSE42-NEXT: pextrb $8, %xmm0, %r12d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pextrb $0, %xmm0, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %esi ; SSE42-NEXT: pextrb $8, %xmm0, %ebx ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 -; SSE42-NEXT: pextrb $0, %xmm0, %eax +; SSE42-NEXT: pextrb $0, %xmm0, %ecx ; SSE42-NEXT: pextrb $8, %xmm0, %r13d ; SSE42-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 ; SSE42-NEXT: andl $1, %r8d ; SSE42-NEXT: andl $1, %r10d -; SSE42-NEXT: leal (%r10,%r8,2), %ecx +; SSE42-NEXT: leal (%r10,%r8,2), %eax ; SSE42-NEXT: andl $1, %ebp -; SSE42-NEXT: leal (%rcx,%rbp,4), %r8d -; SSE42-NEXT: pextrb $0, %xmm0, %ecx +; SSE42-NEXT: leal (%rax,%rbp,4), %r8d +; SSE42-NEXT: pextrb $0, %xmm0, %eax ; SSE42-NEXT: pextrb $8, %xmm0, %ebp ; SSE42-NEXT: andl $1, %edi ; SSE42-NEXT: leal (%r8,%rdi,8), %r8d @@ -6755,8 +6755,8 @@ ; SSE42-NEXT: shll $4, %r15d ; SSE42-NEXT: orl %r8d, %r15d ; SSE42-NEXT: pextrb $8, %xmm1, %edi -; SSE42-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill -; SSE42-NEXT: pextrb $0, %xmm1, %r10d +; SSE42-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE42-NEXT: pextrb $0, %xmm1, %r8d ; SSE42-NEXT: andl $1, %r11d ; SSE42-NEXT: shll $5, %r11d ; SSE42-NEXT: orl %r15d, %r11d @@ -6765,93 +6765,93 @@ ; SSE42-NEXT: andl $1, %r9d ; SSE42-NEXT: shll $7, %r9d ; SSE42-NEXT: orl %r14d, %r9d -; SSE42-NEXT: pextrb $0, %xmm2, %r14d +; SSE42-NEXT: pextrb $0, %xmm2, %r10d ; SSE42-NEXT: pextrb $8, %xmm2, %edi -; SSE42-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill -; SSE42-NEXT: andl $1, %esi -; SSE42-NEXT: shll $8, %esi -; SSE42-NEXT: orl %r9d, %esi +; SSE42-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %r9d, %edx ; SSE42-NEXT: andl $1, %r12d ; SSE42-NEXT: shll $9, %r12d -; SSE42-NEXT: orl %esi, %r12d -; SSE42-NEXT: pextrb $0, %xmm3, %r8d -; SSE42-NEXT: pextrb $8, %xmm3, %r15d -; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $10, %edx -; SSE42-NEXT: orl %r12d, %edx +; SSE42-NEXT: orl %edx, %r12d +; SSE42-NEXT: pextrb $0, %xmm3, %edi +; SSE42-NEXT: pextrb $8, %xmm3, %r9d +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $10, %esi +; SSE42-NEXT: orl %r12d, %esi ; SSE42-NEXT: andl $1, %ebx ; SSE42-NEXT: shll $11, %ebx -; SSE42-NEXT: orl %edx, %ebx -; SSE42-NEXT: pextrb $0, %xmm4, %r12d -; SSE42-NEXT: pextrb $8, %xmm4, %edi -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $12, %eax -; SSE42-NEXT: orl %ebx, %eax +; SSE42-NEXT: orl %esi, %ebx +; SSE42-NEXT: pextrb $0, %xmm4, %r15d +; SSE42-NEXT: pextrb $8, %xmm4, %r12d +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: shll $12, %ecx +; SSE42-NEXT: orl %ebx, %ecx ; SSE42-NEXT: andl $1, %r13d ; SSE42-NEXT: shll $13, %r13d -; SSE42-NEXT: orl %eax, %r13d -; SSE42-NEXT: pextrb $0, %xmm5, %eax +; SSE42-NEXT: orl %ecx, %r13d +; SSE42-NEXT: pextrb $0, %xmm5, %ecx ; SSE42-NEXT: pextrb $8, %xmm5, %ebx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx -; SSE42-NEXT: orl %r13d, %ecx +; SSE42-NEXT: andl $1, %eax +; SSE42-NEXT: shll $14, %eax +; SSE42-NEXT: orl %r13d, %eax ; SSE42-NEXT: shll $15, %ebp -; SSE42-NEXT: orl %ecx, %ebp +; SSE42-NEXT: orl %eax, %ebp ; SSE42-NEXT: pextrb $0, %xmm6, %r13d -; SSE42-NEXT: pextrb $8, %xmm6, %edx +; SSE42-NEXT: pextrb $8, %xmm6, %esi ; SSE42-NEXT: orl %r11d, %ebp -; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload -; SSE42-NEXT: movw %bp, 2(%r9) +; SSE42-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; SSE42-NEXT: movw %bp, 2(%r14) ; SSE42-NEXT: pextrb $0, %xmm7, %r11d -; SSE42-NEXT: pextrb $8, %xmm7, %ecx -; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: pextrb $8, %xmm7, %eax +; SSE42-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: andl $1, %r8d +; SSE42-NEXT: leal (%r8,%rdx,2), %r8d ; SSE42-NEXT: andl $1, %r10d -; SSE42-NEXT: leal (%r10,%rsi,2), %esi -; SSE42-NEXT: andl $1, %r14d -; SSE42-NEXT: leal (%rsi,%r14,4), %r14d +; SSE42-NEXT: leal (%r8,%r10,4), %r8d ; SSE42-NEXT: pextrb $0, %xmm8, %r10d ; SSE42-NEXT: pextrb $8, %xmm8, %ebp -; SSE42-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; SSE42-NEXT: andl $1, %esi -; SSE42-NEXT: leal (%r14,%rsi,8), %esi -; SSE42-NEXT: andl $1, %r8d -; SSE42-NEXT: shll $4, %r8d -; SSE42-NEXT: orl %esi, %r8d +; SSE42-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%r8,%rdx,8), %r8d +; SSE42-NEXT: andl $1, %edi +; SSE42-NEXT: shll $4, %edi +; SSE42-NEXT: orl %r8d, %edi +; SSE42-NEXT: andl $1, %r9d +; SSE42-NEXT: shll $5, %r9d +; SSE42-NEXT: orl %edi, %r9d ; SSE42-NEXT: andl $1, %r15d -; SSE42-NEXT: shll $5, %r15d -; SSE42-NEXT: orl %r8d, %r15d +; SSE42-NEXT: shll $6, %r15d ; SSE42-NEXT: andl $1, %r12d -; SSE42-NEXT: shll $6, %r12d -; SSE42-NEXT: andl $1, %edi -; SSE42-NEXT: shll $7, %edi -; SSE42-NEXT: orl %r12d, %edi -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $8, %eax -; SSE42-NEXT: orl %edi, %eax +; SSE42-NEXT: shll $7, %r12d +; SSE42-NEXT: orl %r15d, %r12d +; SSE42-NEXT: andl $1, %ecx +; SSE42-NEXT: shll $8, %ecx +; SSE42-NEXT: orl %r12d, %ecx ; SSE42-NEXT: andl $1, %ebx ; SSE42-NEXT: shll $9, %ebx -; SSE42-NEXT: orl %eax, %ebx +; SSE42-NEXT: orl %ecx, %ebx ; SSE42-NEXT: andl $1, %r13d ; SSE42-NEXT: shll $10, %r13d ; SSE42-NEXT: orl %ebx, %r13d -; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %r13d, %edx +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %r13d, %esi ; SSE42-NEXT: andl $1, %r11d ; SSE42-NEXT: shll $12, %r11d -; SSE42-NEXT: orl %edx, %r11d -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $13, %ecx -; SSE42-NEXT: orl %r11d, %ecx +; SSE42-NEXT: orl %esi, %r11d +; SSE42-NEXT: andl $1, %eax +; SSE42-NEXT: shll $13, %eax +; SSE42-NEXT: orl %r11d, %eax ; SSE42-NEXT: andl $1, %r10d ; SSE42-NEXT: shll $14, %r10d -; SSE42-NEXT: orl %ecx, %r10d +; SSE42-NEXT: orl %eax, %r10d ; SSE42-NEXT: shll $15, %ebp ; SSE42-NEXT: orl %r10d, %ebp -; SSE42-NEXT: orl %r15d, %ebp -; SSE42-NEXT: movw %bp, (%r9) -; SSE42-NEXT: movq %r9, %rax +; SSE42-NEXT: orl %r9d, %ebp +; SSE42-NEXT: movw %bp, (%r14) +; SSE42-NEXT: movq %r14, %rax ; SSE42-NEXT: popq %rbx ; SSE42-NEXT: popq %r12 ; SSE42-NEXT: popq %r13 @@ -6998,6 +6998,7 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; SSE2-LABEL: test_cmp_v32i64: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] ; SSE2-NEXT: pxor %xmm8, %xmm1 ; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 @@ -7234,130 +7235,130 @@ ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE2-NEXT: movapd %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, 2(%rdi) +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, 2(%rdi) ; SSE2-NEXT: movapd %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rcx,%rax,2), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,4), %eax ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: leal (%rax,%rcx,8), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $4, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $1, %eax -; SSE2-NEXT: shll $5, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $6, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $7, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $8, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rdx,%rcx,2), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $9, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $10, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,4), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $11, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $12, %ecx -; SSE2-NEXT: orl %edx, %ecx +; SSE2-NEXT: leal (%rcx,%rdx,8), %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx ; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: shll $13, %edx +; SSE2-NEXT: shll $4, %edx ; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx ; SSE2-NEXT: andl $1, %ecx -; SSE2-NEXT: shll $14, %ecx +; SSE2-NEXT: shll $5, %ecx ; SSE2-NEXT: orl %edx, %ecx ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: shll $15, %edx -; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: movw %dx, (%rdi) -; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $6, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $7, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $8, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $9, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $10, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $11, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $12, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: andl $1, %esi +; SSE2-NEXT: shll $13, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE2-NEXT: andl $1, %edx +; SSE2-NEXT: shll $14, %edx +; SSE2-NEXT: orl %esi, %edx +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; SSE2-NEXT: shll $15, %esi +; SSE2-NEXT: orl %edx, %esi +; SSE2-NEXT: orl %ecx, %esi +; SSE2-NEXT: movw %si, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_cmp_v32i64: ; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm15 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm13 @@ -7382,125 +7383,124 @@ ; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm13 ; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm14 ; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm15 -; SSE42-NEXT: pextrb $8, %xmm15, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm15, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $0, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $8, %xmm14, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm13, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $8, %xmm13, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $0, %xmm12, %ecx +; SSE42-NEXT: pextrb $8, %xmm15, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $8, %xmm12, %edx +; SSE42-NEXT: pextrb $0, %xmm15, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm11, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm11, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $0, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm10, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm10, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $8, %xmm14, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm9, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm9, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm13, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm8, %ecx +; SSE42-NEXT: pextrb $8, %xmm13, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm8, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, 2(%rdi) -; SSE42-NEXT: pextrb $8, %xmm0, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: pextrb $0, %xmm0, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rcx,%rax,2), %eax -; SSE42-NEXT: pextrb $0, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,4), %eax -; SSE42-NEXT: pextrb $8, %xmm1, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: leal (%rax,%rcx,8), %eax -; SSE42-NEXT: pextrb $0, %xmm2, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $4, %ecx -; SSE42-NEXT: orl %eax, %ecx -; SSE42-NEXT: pextrb $8, %xmm2, %eax -; SSE42-NEXT: andl $1, %eax -; SSE42-NEXT: shll $5, %eax -; SSE42-NEXT: orl %ecx, %eax -; SSE42-NEXT: pextrb $0, %xmm3, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $6, %ecx -; SSE42-NEXT: pextrb $8, %xmm3, %edx +; SSE42-NEXT: pextrb $0, %xmm12, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $7, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm4, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $8, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm4, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $8, %xmm12, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm11, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm11, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm10, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $9, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm5, %ecx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm10, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm9, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm9, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm8, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm8, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, 2(%rdi) +; SSE42-NEXT: pextrb $8, %xmm0, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $10, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm5, %edx +; SSE42-NEXT: pextrb $0, %xmm0, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $11, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm6, %ecx -; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $12, %ecx -; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm6, %edx +; SSE42-NEXT: leal (%rdx,%rcx,2), %ecx +; SSE42-NEXT: pextrb $0, %xmm1, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: leal (%rcx,%rdx,4), %ecx +; SSE42-NEXT: pextrb $8, %xmm1, %edx ; SSE42-NEXT: andl $1, %edx -; SSE42-NEXT: shll $13, %edx +; SSE42-NEXT: leal (%rcx,%rdx,8), %ecx +; SSE42-NEXT: pextrb $0, %xmm2, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $4, %edx ; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: pextrb $0, %xmm7, %ecx +; SSE42-NEXT: pextrb $8, %xmm2, %ecx ; SSE42-NEXT: andl $1, %ecx -; SSE42-NEXT: shll $14, %ecx +; SSE42-NEXT: shll $5, %ecx ; SSE42-NEXT: orl %edx, %ecx -; SSE42-NEXT: pextrb $8, %xmm7, %edx -; SSE42-NEXT: shll $15, %edx -; SSE42-NEXT: orl %ecx, %edx -; SSE42-NEXT: orl %eax, %edx -; SSE42-NEXT: movw %dx, (%rdi) -; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pextrb $0, %xmm3, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $6, %edx +; SSE42-NEXT: pextrb $8, %xmm3, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $7, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm4, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $8, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm4, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $9, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm5, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $10, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm5, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $11, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm6, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $12, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm6, %esi +; SSE42-NEXT: andl $1, %esi +; SSE42-NEXT: shll $13, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: pextrb $0, %xmm7, %edx +; SSE42-NEXT: andl $1, %edx +; SSE42-NEXT: shll $14, %edx +; SSE42-NEXT: orl %esi, %edx +; SSE42-NEXT: pextrb $8, %xmm7, %esi +; SSE42-NEXT: shll $15, %esi +; SSE42-NEXT: orl %edx, %esi +; SSE42-NEXT: orl %ecx, %esi +; SSE42-NEXT: movw %si, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: test_cmp_v32i64: Index: test/CodeGen/X86/vector-interleave.ll =================================================================== --- test/CodeGen/X86/vector-interleave.ll +++ test/CodeGen/X86/vector-interleave.ll @@ -10,6 +10,7 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e, <8 x i16> %f, <8 x i16> %h, <8 x i16> %g) { ; SSE-LABEL: interleave8x8: ; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movdqa %xmm0, %xmm8 ; SSE-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm1[0],xmm8[1],xmm1[1],xmm8[2],xmm1[2],xmm8[3],xmm1[3] ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -54,7 +55,6 @@ ; SSE-NEXT: movdqa %xmm1, 32(%rdi) ; SSE-NEXT: movdqa %xmm8, 16(%rdi) ; SSE-NEXT: movdqa %xmm5, (%rdi) -; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: interleave8x8: Index: test/CodeGen/X86/vector-pcmp.ll =================================================================== --- test/CodeGen/X86/vector-pcmp.ll +++ test/CodeGen/X86/vector-pcmp.ll @@ -86,10 +86,10 @@ define <1 x i128> @test_strange_type(<1 x i128> %x) { ; CHECK-LABEL: test_strange_type: ; CHECK: # %bb.0: -; CHECK-NEXT: sarq $63, %rsi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: notq %rax +; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: retq %sign = ashr <1 x i128> %x, %not = xor <1 x i128> %sign, Index: test/CodeGen/X86/vector-reduce-fadd-fast.ll =================================================================== --- test/CodeGen/X86/vector-reduce-fadd-fast.ll +++ test/CodeGen/X86/vector-reduce-fadd-fast.ll @@ -20,8 +20,8 @@ ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: haddps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: @@ -527,8 +527,8 @@ ; ; SSE41-LABEL: test_v2f64: ; SSE41: # %bb.0: -; SSE41-NEXT: haddpd %xmm1, %xmm1 ; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: haddpd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f64: @@ -555,9 +555,9 @@ ; ; SSE41-LABEL: test_v4f64: ; SSE41: # %bb.0: -; SSE41-NEXT: addpd %xmm2, %xmm1 -; SSE41-NEXT: haddpd %xmm1, %xmm1 ; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: addpd %xmm2, %xmm0 +; SSE41-NEXT: haddpd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f64: @@ -594,11 +594,11 @@ ; ; SSE41-LABEL: test_v8f64: ; SSE41: # %bb.0: -; SSE41-NEXT: addpd %xmm4, %xmm2 -; SSE41-NEXT: addpd %xmm3, %xmm1 -; SSE41-NEXT: addpd %xmm2, %xmm1 -; SSE41-NEXT: haddpd %xmm1, %xmm1 ; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: addpd %xmm4, %xmm2 +; SSE41-NEXT: addpd %xmm3, %xmm0 +; SSE41-NEXT: addpd %xmm2, %xmm0 +; SSE41-NEXT: haddpd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f64: @@ -643,15 +643,15 @@ ; ; SSE41-LABEL: test_v16f64: ; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm4, %xmm0 ; SSE41-NEXT: addpd %xmm6, %xmm2 ; SSE41-NEXT: addpd %xmm7, %xmm3 ; SSE41-NEXT: addpd %xmm5, %xmm1 ; SSE41-NEXT: addpd %xmm3, %xmm1 -; SSE41-NEXT: addpd {{[0-9]+}}(%rsp), %xmm4 -; SSE41-NEXT: addpd %xmm2, %xmm4 -; SSE41-NEXT: addpd %xmm1, %xmm4 -; SSE41-NEXT: haddpd %xmm4, %xmm4 -; SSE41-NEXT: movapd %xmm4, %xmm0 +; SSE41-NEXT: addpd {{[0-9]+}}(%rsp), %xmm0 +; SSE41-NEXT: addpd %xmm2, %xmm0 +; SSE41-NEXT: addpd %xmm1, %xmm0 +; SSE41-NEXT: haddpd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f64: Index: test/CodeGen/X86/vector-shift-ashr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-128.ll +++ test/CodeGen/X86/vector-shift-ashr-128.ll @@ -257,32 +257,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddw %xmm1, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: paddw %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psraw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psraw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psraw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shift-lshr-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-128.ll +++ test/CodeGen/X86/vector-shift-lshr-128.ll @@ -227,32 +227,33 @@ ; ; SSE41-LABEL: var_shift_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psllw $12, %xmm0 -; SSE41-NEXT: psllw $4, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddw %xmm1, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psllw $4, %xmm2 +; SSE41-NEXT: por %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: paddw %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $4, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $4, %xmm2 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $2, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $2, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm2 +; SSE41-NEXT: psrlw $1, %xmm2 ; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: var_shift_v8i16: Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -164,8 +164,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_22: ; SSE2: # %bb.0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_22: @@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_32: ; SSE: # %bb.0: -; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] ; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_32: @@ -208,8 +208,8 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_33: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_33: @@ -309,8 +309,8 @@ define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_3u: ; SSE: # %bb.0: -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_3u: @@ -337,8 +337,8 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02_copy: ; SSE: # %bb.0: -; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02_copy: @@ -382,26 +382,26 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_03_copy: @@ -444,26 +444,26 @@ define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_12_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_12_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_12_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_12_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_12_copy: @@ -489,8 +489,8 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_13_copy: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_13_copy: @@ -517,8 +517,8 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20_copy: ; SSE: # %bb.0: -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20_copy: @@ -559,26 +559,26 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_21_copy: @@ -621,26 +621,26 @@ define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_30_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_30_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_30_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_30_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_30_copy: @@ -667,8 +667,8 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_31_copy: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_31_copy: Index: test/CodeGen/X86/vector-shuffle-combining-sse4a.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-sse4a.ll +++ test/CodeGen/X86/vector-shuffle-combining-sse4a.ll @@ -33,8 +33,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_16i8: ; SSSE3: # %bb.0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_16i8: @@ -54,8 +54,8 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_8i16: ; SSSE3: # %bb.0: -; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_8i16: Index: test/CodeGen/X86/vector-shuffle-combining-ssse3.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -574,8 +574,8 @@ define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: combine_unpckl_arg1_pshufb: ; SSE: # %bb.0: -; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero,xmm1[0],zero,zero,zero ; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero ; SSE-NEXT: retq ; ; AVX-LABEL: combine_unpckl_arg1_pshufb: Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -1624,8 +1624,8 @@ define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test1b: ; SSE: # %bb.0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test1b: @@ -1640,8 +1640,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: combine_test2b: ; SSE2: # %bb.0: -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: combine_test2b: @@ -1695,8 +1695,8 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test4b: ; SSE: # %bb.0: -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test4b: @@ -2766,30 +2766,30 @@ define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) { ; SSE2-LABEL: PR22412: ; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movaps %xmm3, %xmm1 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: PR22412: ; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movaps %xmm3, %xmm1 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2] -; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2] -; SSSE3-NEXT: movaps %xmm3, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[3,2] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: PR22412: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] -; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] -; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm3[3,2] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[3,2] +; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: PR22412: Index: test/CodeGen/X86/vector-zext.ll =================================================================== --- test/CodeGen/X86/vector-zext.ll +++ test/CodeGen/X86/vector-zext.ll @@ -2123,6 +2123,7 @@ define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { ; SSE2-LABEL: zext_32i8_to_32i32: ; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2150,11 +2151,11 @@ ; SSE2-NEXT: movdqa %xmm5, 32(%rdi) ; SSE2-NEXT: movdqa %xmm3, 16(%rdi) ; SSE2-NEXT: movdqa %xmm8, (%rdi) -; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_32i8_to_32i32: ; SSSE3: # %bb.0: +; SSSE3-NEXT: movq %rdi, %rax ; SSSE3-NEXT: pxor %xmm2, %xmm2 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] @@ -2182,11 +2183,11 @@ ; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) ; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) ; SSSE3-NEXT: movdqa %xmm8, (%rdi) -; SSSE3-NEXT: movq %rdi, %rax ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_32i8_to_32i32: ; SSE41: # %bb.0: +; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero @@ -2209,7 +2210,6 @@ ; SSE41-NEXT: movdqa %xmm4, 32(%rdi) ; SSE41-NEXT: movdqa %xmm3, 16(%rdi) ; SSE41-NEXT: movdqa %xmm2, (%rdi) -; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: retq ; ; AVX1-LABEL: zext_32i8_to_32i32: Index: test/CodeGen/X86/vectorcall.ll =================================================================== --- test/CodeGen/X86/vectorcall.ll +++ test/CodeGen/X86/vectorcall.ll @@ -22,7 +22,8 @@ } ; X86-LABEL: {{^}}test_int_3@@8: ; X64-LABEL: {{^}}test_int_3@@8: -; CHECK: movl %ecx, %eax +; X86: movl %ecx, %eax +; X64: movq %rcx, %rax define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) { %s = add i32 %a, %b @@ -148,8 +149,8 @@ ret <4 x float> %0 } ; CHECK-LABEL: test_mixed_5 -; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp) -; CHECK: movaps %xmm5, %xmm0 +; CHECK-DAG: movaps %xmm{{[0,5]}}, 16(%{{(e|r)}}sp) +; CHECK-DAG: movaps %xmm5, %xmm0 ; CHECK: ret{{[ql]}} define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) { @@ -183,12 +184,12 @@ ret void } ; CHECK-LABEL: test_mixed_7 +; X64: mov{{[ql]}} %rcx, %rax ; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) ; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) ; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) ; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) ; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}}) -; X64: mov{{[ql]}} %rcx, %rax ; CHECK: ret{{[ql]}} define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) { Index: test/CodeGen/X86/vselect-minmax.ll =================================================================== --- test/CodeGen/X86/vselect-minmax.ll +++ test/CodeGen/X86/vselect-minmax.ll @@ -4535,23 +4535,24 @@ ; ; SSE4-LABEL: test121: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm6, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test121: @@ -4655,23 +4656,24 @@ ; ; SSE4-LABEL: test122: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm6, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test122: @@ -4775,9 +4777,10 @@ ; ; SSE4-LABEL: test123: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -4785,12 +4788,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test123: @@ -4894,9 +4897,10 @@ ; ; SSE4-LABEL: test124: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -4904,12 +4908,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test124: @@ -5013,36 +5017,39 @@ ; ; SSE4-LABEL: test125: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test125: @@ -5160,36 +5167,39 @@ ; ; SSE4-LABEL: test126: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test126: @@ -5307,35 +5317,38 @@ ; ; SSE4-LABEL: test127: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: pxor %xmm7, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm1 ; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm5, %xmm0 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test127: @@ -5453,35 +5466,38 @@ ; ; SSE4-LABEL: test128: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: pxor %xmm7, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm1 ; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm5, %xmm0 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test128: @@ -6977,9 +6993,10 @@ ; ; SSE4-LABEL: test153: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -6987,12 +7004,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test153: @@ -7096,9 +7113,10 @@ ; ; SSE4-LABEL: test154: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -7106,12 +7124,12 @@ ; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test154: @@ -7215,23 +7233,24 @@ ; ; SSE4-LABEL: test155: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm0, %xmm7 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 +; SSE4-NEXT: pcmpgtq %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm5, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE4-NEXT: movdqa %xmm6, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 ; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test155: @@ -7335,35 +7354,38 @@ ; ; SSE4-LABEL: test156: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: pxor %xmm7, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm4 +; SSE4-NEXT: pxor %xmm7, %xmm4 ; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm1 +; SSE4-NEXT: pxor %xmm7, %xmm1 ; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 +; SSE4-NEXT: movapd %xmm5, %xmm0 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test156: @@ -7481,36 +7503,39 @@ ; ; SSE4-LABEL: test159: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test159: @@ -7628,36 +7653,39 @@ ; ; SSE4-LABEL: test160: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: movdqa %xmm7, %xmm8 +; SSE4-NEXT: movdqa %xmm6, %xmm9 +; SSE4-NEXT: movdqa %xmm5, %xmm10 +; SSE4-NEXT: movdqa %xmm0, %xmm5 +; SSE4-NEXT: movdqa {{.*#+}} xmm7 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: pxor %xmm7, %xmm6 ; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm1, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm6, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm5, %xmm4 +; SSE4-NEXT: movdqa %xmm1, %xmm5 +; SSE4-NEXT: pxor %xmm7, %xmm5 +; SSE4-NEXT: movdqa %xmm10, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm5, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm10 ; SSE4-NEXT: movdqa %xmm2, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm1 +; SSE4-NEXT: movdqa %xmm9, %xmm0 +; SSE4-NEXT: pxor %xmm7, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm9 ; SSE4-NEXT: movdqa %xmm3, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm7, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 +; SSE4-NEXT: pxor %xmm7, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm7 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm7 +; SSE4-NEXT: movdqa %xmm7, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm8 ; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 +; SSE4-NEXT: movapd %xmm10, %xmm1 +; SSE4-NEXT: movapd %xmm9, %xmm2 +; SSE4-NEXT: movapd %xmm8, %xmm3 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test160: Index: test/CodeGen/X86/vselect.ll =================================================================== --- test/CodeGen/X86/vselect.ll +++ test/CodeGen/X86/vselect.ll @@ -457,6 +457,7 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { ; SSE-LABEL: select_illegal: ; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 @@ -469,13 +470,12 @@ ; SSE-NEXT: movaps %xmm2, 32(%rdi) ; SSE-NEXT: movaps %xmm1, 16(%rdi) ; SSE-NEXT: movaps %xmm0, (%rdi) -; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: select_illegal: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: vmovaps %ymm7, %ymm3 +; AVX-NEXT: vmovaps %ymm6, %ymm2 ; AVX-NEXT: retq %sel = select <16 x i1> , <16 x double> %a, <16 x double> %b ret <16 x double> %sel Index: test/CodeGen/X86/widen_bitops-0.ll =================================================================== --- test/CodeGen/X86/widen_bitops-0.ll +++ test/CodeGen/X86/widen_bitops-0.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v3i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v3i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v3i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i24_as_v8i3: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i24_as_v8i3: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i24_as_v8i3: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> Index: test/CodeGen/X86/widen_bitops-1.ll =================================================================== --- test/CodeGen/X86/widen_bitops-1.ll +++ test/CodeGen/X86/widen_bitops-1.ll @@ -15,8 +15,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v4i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -34,8 +34,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v4i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -53,8 +53,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v4i8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -76,8 +76,8 @@ ; ; X64-SSE-LABEL: and_i32_as_v8i4: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: andl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -95,8 +95,8 @@ ; ; X64-SSE-LABEL: xor_i32_as_v8i4: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: xorl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -114,8 +114,8 @@ ; ; X64-SSE-LABEL: or_i32_as_v8i4: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: orl %esi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -21,11 +21,11 @@ ; ; X64-LABEL: add3i32: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: paddd (%rdx), %xmm0 ; X64-NEXT: pextrd $2, %xmm0, 8(%rdi) ; X64-NEXT: movq %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 16 %b = load %i32vec3, %i32vec3* %bp, align 16 @@ -54,6 +54,7 @@ ; ; X64-LABEL: add3i32_2: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: pinsrd $2, 8(%rsi), %xmm0 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero @@ -61,7 +62,6 @@ ; X64-NEXT: paddd %xmm0, %xmm1 ; X64-NEXT: pextrd $2, %xmm1, 8(%rdi) ; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i32vec3, %i32vec3* %ap, align 8 %b = load %i32vec3, %i32vec3* %bp, align 8 @@ -89,6 +89,7 @@ ; ; X64-LABEL: add7i32: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddd (%rdx), %xmm0 @@ -96,7 +97,6 @@ ; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) ; X64-NEXT: movq %xmm1, 16(%rdi) ; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i32vec7, %i32vec7* %ap, align 16 %b = load %i32vec7, %i32vec7* %bp, align 16 @@ -125,6 +125,7 @@ ; ; X64-LABEL: add12i32: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 @@ -134,7 +135,6 @@ ; X64-NEXT: movdqa %xmm2, 32(%rdi) ; X64-NEXT: movdqa %xmm1, 16(%rdi) ; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i32vec12, %i32vec12* %ap, align 16 %b = load %i32vec12, %i32vec12* %bp, align 16 @@ -171,13 +171,13 @@ ; ; X64-LABEL: add3i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; X64-NEXT: paddd %xmm0, %xmm1 ; X64-NEXT: pextrw $4, %xmm1, 4(%rdi) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; X64-NEXT: movd %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i16vec3, %i16vec3* %ap, align 16 %b = load %i16vec3, %i16vec3* %bp, align 16 @@ -201,11 +201,11 @@ ; ; X64-LABEL: add4i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: paddw %xmm0, %xmm1 ; X64-NEXT: movq %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i16vec4, %i16vec4* %ap, align 16 %b = load %i16vec4, %i16vec4* %bp, align 16 @@ -232,13 +232,13 @@ ; ; X64-LABEL: add12i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddw (%rdx), %xmm0 ; X64-NEXT: paddw 16(%rdx), %xmm1 ; X64-NEXT: movq %xmm1, 16(%rdi) ; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i16vec12, %i16vec12* %ap, align 16 %b = load %i16vec12, %i16vec12* %bp, align 16 @@ -267,6 +267,7 @@ ; ; X64-LABEL: add18i16: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: movdqa 32(%rsi), %xmm2 @@ -276,7 +277,6 @@ ; X64-NEXT: movd %xmm2, 32(%rdi) ; X64-NEXT: movdqa %xmm1, 16(%rdi) ; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i16vec18, %i16vec18* %ap, align 16 %b = load %i16vec18, %i16vec18* %bp, align 16 @@ -305,13 +305,13 @@ ; ; X64-LABEL: add3i8: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: paddd %xmm0, %xmm1 ; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) ; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; X64-NEXT: pextrw $0, %xmm1, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i8vec3, %i8vec3* %ap, align 16 %b = load %i8vec3, %i8vec3* %bp, align 16 @@ -341,6 +341,7 @@ ; ; X64-LABEL: add31i8: ; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movdqa (%rsi), %xmm0 ; X64-NEXT: movdqa 16(%rsi), %xmm1 ; X64-NEXT: paddb (%rdx), %xmm0 @@ -350,7 +351,6 @@ ; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) ; X64-NEXT: movq %xmm1, 16(%rdi) ; X64-NEXT: movdqa %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %a = load %i8vec31, %i8vec31* %ap, align 16 %b = load %i8vec31, %i8vec31* %bp, align 16 @@ -384,6 +384,7 @@ ; ; X64-LABEL: rot: ; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movb $-98, 2(%rsi) ; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E ; X64-NEXT: movb $1, 2(%rdx) @@ -395,7 +396,6 @@ ; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) ; X64-NEXT: pextrw $0, %xmm0, (%rdi) -; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq entry: %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* Index: test/CodeGen/X86/widen_load-3.ll =================================================================== --- test/CodeGen/X86/widen_load-3.ll +++ test/CodeGen/X86/widen_load-3.ll @@ -41,26 +41,26 @@ ; ; X64-SSE-LABEL: load7_aligned: ; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movaps (%rsi), %xmm0 ; X64-SSE-NEXT: movaps 16(%rsi), %xmm1 ; X64-SSE-NEXT: movaps 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rdi) ; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) ; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) ; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_aligned: ; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovaps (%rsi), %ymm0 ; X64-AVX-NEXT: vmovaps 32(%rsi), %ymm1 ; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) ; X64-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 ; X64-AVX-NEXT: vmovlps %xmm0, 48(%rdi) ; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) -; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x @@ -101,26 +101,26 @@ ; ; X64-SSE-LABEL: load7_unaligned: ; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: movups (%rsi), %xmm0 ; X64-SSE-NEXT: movups 16(%rsi), %xmm1 ; X64-SSE-NEXT: movups 32(%rsi), %xmm2 -; X64-SSE-NEXT: movq 48(%rsi), %rax -; X64-SSE-NEXT: movq %rax, 48(%rdi) +; X64-SSE-NEXT: movq 48(%rsi), %rcx +; X64-SSE-NEXT: movq %rcx, 48(%rdi) ; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) ; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) ; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: movq %rdi, %rax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load7_unaligned: ; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vmovups (%rsi), %ymm0 ; X64-AVX-NEXT: vmovups 32(%rsi), %xmm1 -; X64-AVX-NEXT: movq 48(%rsi), %rax -; X64-AVX-NEXT: movq %rax, 48(%rdi) +; X64-AVX-NEXT: movq 48(%rsi), %rcx +; X64-AVX-NEXT: movq %rcx, 48(%rdi) ; X64-AVX-NEXT: vmovaps %xmm1, 32(%rdi) ; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) -; X64-AVX-NEXT: movq %rdi, %rax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq %x1 = load <7 x i64>, <7 x i64>* %x, align 1 Index: test/CodeGen/X86/win64_vararg.ll =================================================================== --- test/CodeGen/X86/win64_vararg.ll +++ test/CodeGen/X86/win64_vararg.ll @@ -121,10 +121,10 @@ } ; CHECK-LABEL: sret_arg: ; CHECK: pushq +; CHECK: movq %rcx, %rax ; CHECK-DAG: movq %r9, 40(%rsp) ; CHECK-DAG: movq %r8, 32(%rsp) ; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]] -; CHECK: movl %[[tmp]], (%[[sret:[^ ]*]]) -; CHECK: movq %[[sret]], %rax +; CHECK: movl %[[tmp]], (%rax) ; CHECK: popq ; CHECK: retq Index: test/CodeGen/X86/x64-cet-intrinsics.ll =================================================================== --- test/CodeGen/X86/x64-cet-intrinsics.ll +++ test/CodeGen/X86/x64-cet-intrinsics.ll @@ -30,8 +30,8 @@ define i32 @test_rdsspd(i32 %a) { ; CHECK-LABEL: test_rdsspd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: rdsspd %edi ; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: rdsspd %eax ; CHECK-NEXT: retq entry: %0 = call i32 @llvm.x86.rdsspd(i32 %a) @@ -43,8 +43,8 @@ define i64 @test_rdsspq(i64 %a) { ; CHECK-LABEL: test_rdsspq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: rdsspq %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: rdsspq %rax ; CHECK-NEXT: retq entry: %0 = call i64 @llvm.x86.rdsspq(i64 %a) Index: test/CodeGen/X86/x86-64-bittest-logic.ll =================================================================== --- test/CodeGen/X86/x86-64-bittest-logic.ll +++ test/CodeGen/X86/x86-64-bittest-logic.ll @@ -124,8 +124,8 @@ define i64 @and1_optsize(i64 %x) optsize { ; CHECK-LABEL: and1_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $31, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $31, %rax ; CHECK-NEXT: retq %a = and i64 %x, 18446744071562067967 ; clear bit 31 ret i64 %a @@ -134,8 +134,8 @@ define i64 @and2_optsize(i64 %x) optsize { ; CHECK-LABEL: and2_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $32, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $32, %rax ; CHECK-NEXT: retq %a = and i64 %x, 18446744069414584319 ; clear bit 32 ret i64 %a @@ -144,8 +144,8 @@ define i64 @and3_optsize(i64 %x) optsize { ; CHECK-LABEL: and3_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $62, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $62, %rax ; CHECK-NEXT: retq %a = and i64 %x, 13835058055282163711 ; clear bit 62 ret i64 %a @@ -154,8 +154,8 @@ define i64 @and4_optsize(i64 %x) optsize { ; CHECK-LABEL: and4_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btrq $63, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btrq $63, %rax ; CHECK-NEXT: retq %a = and i64 %x, 9223372036854775807 ; clear bit 63 ret i64 %a @@ -164,8 +164,8 @@ define i64 @or1_optsize(i64 %x) optsize { ; CHECK-LABEL: or1_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $31, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $31, %rax ; CHECK-NEXT: retq %a = or i64 %x, 2147483648 ; set bit 31 ret i64 %a @@ -174,8 +174,8 @@ define i64 @or2_optsize(i64 %x) optsize { ; CHECK-LABEL: or2_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $32, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $32, %rax ; CHECK-NEXT: retq %a = or i64 %x, 4294967296 ; set bit 32 ret i64 %a @@ -184,8 +184,8 @@ define i64 @or3_optsize(i64 %x) optsize { ; CHECK-LABEL: or3_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $62, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $62, %rax ; CHECK-NEXT: retq %a = or i64 %x, 4611686018427387904 ; set bit 62 ret i64 %a @@ -194,8 +194,8 @@ define i64 @or4_optsize(i64 %x) optsize { ; CHECK-LABEL: or4_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btsq $63, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btsq $63, %rax ; CHECK-NEXT: retq %a = or i64 %x, 9223372036854775808 ; set bit 63 ret i64 %a @@ -204,8 +204,8 @@ define i64 @xor1_optsize(i64 %x) optsize { ; CHECK-LABEL: xor1_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $31, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $31, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 2147483648 ; toggle bit 31 ret i64 %a @@ -214,8 +214,8 @@ define i64 @xor2_optsize(i64 %x) optsize { ; CHECK-LABEL: xor2_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $32, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $32, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 4294967296 ; toggle bit 32 ret i64 %a @@ -224,8 +224,8 @@ define i64 @xor3_optsize(i64 %x) optsize { ; CHECK-LABEL: xor3_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $62, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $62, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 4611686018427387904 ; toggle bit 62 ret i64 %a @@ -234,8 +234,8 @@ define i64 @xor4_optsize(i64 %x) optsize { ; CHECK-LABEL: xor4_optsize: ; CHECK: # %bb.0: -; CHECK-NEXT: btcq $63, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: btcq $63, %rax ; CHECK-NEXT: retq %a = xor i64 %x, 9223372036854775808 ; toggle bit 63 ret i64 %a Index: test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll =================================================================== --- test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll +++ test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll @@ -14,8 +14,8 @@ define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 { ; CHECK-LABEL: _Z8lshift10mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shldq $10, %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shldq $10, %rsi, %rax ; CHECK-NEXT: retq entry: %shl = shl i64 %a, 10 @@ -40,8 +40,8 @@ define i64 @_Z8lshift11mm(i64 %a, i64 %b) #1 { ; CHECK-LABEL: _Z8lshift11mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: shldq $11, %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shldq $11, %rsi, %rax ; CHECK-NEXT: retq entry: %shl = shl i64 %a, 11 Index: test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- test/CodeGen/X86/x86-cmov-converter.ll +++ test/CodeGen/X86/x86-cmov-converter.ll @@ -336,14 +336,14 @@ ; CHECK-LABEL: test_cmov_memoperand: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %load = load i32, i32* %y %z = select i1 %cond, i32 %x, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%rcx), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], % ret i32 %z } @@ -353,6 +353,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z1 = select i1 %cond, i32 %x, i32 %a @@ -362,17 +363,16 @@ ; CHECK: ja [[FALSE_BB:.*]] ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] -; CHECK-DAG: movl %{{.*}} %[[R3:.*]] +; CHECK-DAG: movl %{{.*}} %eax ; CHECK: [[FALSE_BB]]: ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -384,6 +384,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edx, %eax ; CHECK: cmpl %y = load i32, i32* %y.ptr %z2 = select i1 %cond, i32 %a, i32 %x @@ -393,17 +394,16 @@ ; CHECK: jbe [[FALSE_BB:.*]] ; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] ; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] -; CHECK-DAG: movl %{{.*}} %[[R3:.*]] +; CHECK-DAG: movl %{{.*}} %eax ; CHECK: [[FALSE_BB]]: ; CHECK: addl ; CHECK-DAG: %[[R1]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] +; CHECK-DAG: %eax ; CHECK-DAG: addl ; CHECK-DAG: %[[R2]] ; CHECK-DAG: , -; CHECK-DAG: %[[R3]] -; CHECK: movl %[[R3]], %eax +; CHECK-DAG: %eax ; CHECK: retq %s1 = add i32 %z1, %z2 %s2 = add i32 %s1, %z3 @@ -434,15 +434,15 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %load = load i32, i32* %p %z = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %z } @@ -453,6 +453,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %load1 = load i32*, i32** %y %p = select i1 %cond, i32* %x, i32* %load1 @@ -461,9 +462,8 @@ ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] ; CHECK: movq (%r{{..}}), %[[R1:.*]] -; CHECK: movl (%[[R1]]), %[[R2:.*]] +; CHECK: movl (%[[R1]]), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R2]], %eax ; CHECK: retq ret i32 %z } @@ -475,6 +475,7 @@ ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: entry: %cond = icmp ugt i32 %a, %b +; CHECK: movl %edi, %eax ; CHECK: cmpl %p = select i1 %cond, i32* %x, i32* %y %p2 = select i1 %cond, i32* %z, i32* %p @@ -482,9 +483,8 @@ %r = select i1 %cond, i32 %a, i32 %load ; CHECK-NOT: cmov ; CHECK: ja [[FALSE_BB:.*]] -; CHECK: movl (%r{{..}}), %[[R:.*]] +; CHECK: movl (%r{{..}}), %eax ; CHECK: [[FALSE_BB]]: -; CHECK: movl %[[R]], %eax ; CHECK: retq ret i32 %r } Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -83,9 +83,7 @@ ; DISABLE: testl %edi, %edi ; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] ; -; SUM is in %esi because it is coalesced with the second -; argument on the else path. -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; Next BB. @@ -99,23 +97,22 @@ ; SUM << 3. ; CHECK: shll $3, [[SUM]] ; -; Jump to epilogue. -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: movl %esi, %eax +; DISABLE: addl %esi, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: movl %esi, %eax +; ENABLE: addl %esi, %eax ; ENABLE-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: @@ -210,7 +207,7 @@ ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -222,22 +219,22 @@ ; CHECK: nop ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: movl %esi, %eax +; DISABLE: addl %esi, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: movl %esi, %eax +; ENABLE: addl %esi, %eax ; ENABLE-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: @@ -286,7 +283,7 @@ ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: nop -; CHECK: xorl [[SUM:%esi]], [[SUM]] +; CHECK: xorl [[SUM:%eax]], [[SUM]] ; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body @@ -297,23 +294,23 @@ ; Next BB. ; CHECK: shll $3, [[SUM]] ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: movl %esi, %eax +; DISABLE: addl %esi, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: movl %esi, %eax +; ENABLE: addl %esi, %eax ; ENABLE-NEXT: retq define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { entry: @@ -379,24 +376,24 @@ ; CHECK-NEXT: jne [[LOOP_LABEL]] ; Next BB. ; CHECK: nop -; CHECK: xorl %esi, %esi +; CHECK: xorl %eax, %eax ; -; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]] +; DISABLE: popq +; DISABLE: retq ; ; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; DISABLE: addl %esi, %esi -; DISABLE: [[EPILOG_BB]]: ## %if.end +; Shift second argument by one in returned register. +; DISABLE: movl %esi, %eax +; DISABLE: addl %esi, %eax ; ; Epilogue code. ; CHECK-DAG: popq %rbx -; CHECK-DAG: movl %esi, %eax ; CHECK: retq ; ; ENABLE: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; ENABLE: addl %esi, %esi -; ENABLE-NEXT: movl %esi, %eax +; ENABLE: movl %esi, %eax +; ENABLE: addl %esi, %eax ; ENABLE-NEXT: retq define i32 @inlineAsm(i32 %cond, i32 %N) { entry: Index: test/CodeGen/X86/xaluo.ll =================================================================== --- test/CodeGen/X86/xaluo.ll +++ test/CodeGen/X86/xaluo.ll @@ -719,26 +719,26 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: saddoselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %esi, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %esi, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %esi, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -749,26 +749,26 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: saddoselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rsi, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: saddoselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rsi, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: saddoselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rsi, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -779,26 +779,26 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: uaddoselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: addl %esi, %eax -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: addl %esi, %ecx +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: addl %esi, %eax -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: addl %esi, %ecx +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: addl %esi, %eax -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: addl %esi, %ecx +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -809,26 +809,26 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: uaddoselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: addq %rsi, %rax -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: addq %rsi, %rcx +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: uaddoselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: addq %rsi, %rax -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: addq %rsi, %rcx +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: uaddoselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: addq %rsi, %rax -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: addq %rsi, %rcx +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -839,23 +839,23 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: ssuboselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %esi, %edi +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %esi, %edi +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -866,23 +866,23 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: ssuboselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rsi, %rdi +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: ssuboselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rsi, %rdi +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: ssuboselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rsi, %rdi +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -893,23 +893,23 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: usuboselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpl %esi, %edi -; SDAG-NEXT: cmovbl %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: cmpl %esi, %edi +; SDAG-NEXT: cmovbl %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: cmpl %esi, %edi -; FAST-NEXT: cmovbl %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: cmpl %esi, %edi +; FAST-NEXT: cmovbl %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: cmovbl %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: cmovbl %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -920,23 +920,23 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: usuboselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: cmpq %rsi, %rdi -; SDAG-NEXT: cmovbq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: cmpq %rsi, %rdi +; SDAG-NEXT: cmovbq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: cmpq %rsi, %rdi -; FAST-NEXT: cmovbq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: cmpq %rsi, %rdi +; FAST-NEXT: cmovbq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: usuboselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: cmpq %rsi, %rdi -; KNL-NEXT: cmovbq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: cmpq %rsi, %rdi +; KNL-NEXT: cmovbq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1372,23 +1372,23 @@ define {i64, i1} @usuboovf(i64 %a, i64 %b) { ; SDAG-LABEL: usuboovf: ; SDAG: ## %bb.0: -; SDAG-NEXT: notq %rsi -; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: notq %rax +; SDAG-NEXT: xorl %edx, %edx ; SDAG-NEXT: retq ; ; FAST-LABEL: usuboovf: ; FAST: ## %bb.0: -; FAST-NEXT: notq %rsi -; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: notq %rax +; FAST-NEXT: xorl %edx, %edx ; FAST-NEXT: retq ; ; KNL-LABEL: usuboovf: ; KNL: ## %bb.0: -; KNL-NEXT: notq %rsi -; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: notq %rax +; KNL-NEXT: xorl %edx, %edx ; KNL-NEXT: retq %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %a) %v0 = extractvalue {i64, i1} %t0, 0 Index: test/CodeGen/X86/xchg-nofold.ll =================================================================== --- test/CodeGen/X86/xchg-nofold.ll +++ test/CodeGen/X86/xchg-nofold.ll @@ -9,20 +9,21 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind { ; CHECK-LABEL: _Z3fooRSt6atomicIbEb: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $3, %rax -; CHECK-NEXT: movb 2147450880(%rax), %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: movb 2147450880(%rcx), %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: andl $7, %ecx -; CHECK-NEXT: cmpb %al, %cl +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $7, %edx +; CHECK-NEXT: cmpb %cl, %dl ; CHECK-NEXT: jge .LBB0_2 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: xchgb %al, (%rdi) -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xchgb %cl, (%rdi) +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: pushq %rax Index: test/CodeGen/X86/xmulo.ll =================================================================== --- test/CodeGen/X86/xmulo.ll +++ test/CodeGen/X86/xmulo.ll @@ -92,6 +92,7 @@ ; SDAG-LABEL: smuloi8: ; SDAG: ## %bb.0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: def $al killed $al killed $eax ; SDAG-NEXT: imulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -101,6 +102,7 @@ ; FAST-LABEL: smuloi8: ; FAST: ## %bb.0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: def $al killed $al killed $eax ; FAST-NEXT: imulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -111,6 +113,7 @@ ; KNL-LABEL: smuloi8: ; KNL: ## %bb.0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: imulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -218,6 +221,7 @@ ; SDAG-LABEL: umuloi8: ; SDAG: ## %bb.0: ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: def $al killed $al killed $eax ; SDAG-NEXT: mulb %sil ; SDAG-NEXT: seto %cl ; SDAG-NEXT: movb %al, (%rdx) @@ -227,6 +231,7 @@ ; FAST-LABEL: umuloi8: ; FAST: ## %bb.0: ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: def $al killed $al killed $eax ; FAST-NEXT: mulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -237,6 +242,7 @@ ; KNL-LABEL: umuloi8: ; KNL: ## %bb.0: ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: def $al killed $al killed $eax ; KNL-NEXT: mulb %sil ; KNL-NEXT: seto %cl ; KNL-NEXT: movb %al, (%rdx) @@ -254,6 +260,7 @@ ; SDAG: ## %bb.0: ; SDAG-NEXT: movq %rdx, %rcx ; SDAG-NEXT: movl %edi, %eax +; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax ; SDAG-NEXT: mulw %si ; SDAG-NEXT: seto %dl ; SDAG-NEXT: movw %ax, (%rcx) @@ -264,6 +271,7 @@ ; FAST: ## %bb.0: ; FAST-NEXT: movq %rdx, %rcx ; FAST-NEXT: movl %edi, %eax +; FAST-NEXT: ## kill: def $ax killed $ax killed $eax ; FAST-NEXT: mulw %si ; FAST-NEXT: seto %dl ; FAST-NEXT: movw %ax, (%rcx) @@ -275,6 +283,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdx, %rcx ; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: mulw %si ; KNL-NEXT: seto %dl ; KNL-NEXT: movw %ax, (%rcx) @@ -369,26 +378,26 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) { ; SDAG-LABEL: smuloselecti32: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl %edi, %eax -; SDAG-NEXT: imull %esi, %eax -; SDAG-NEXT: cmovol %edi, %esi ; SDAG-NEXT: movl %esi, %eax +; SDAG-NEXT: movl %edi, %ecx +; SDAG-NEXT: imull %esi, %ecx +; SDAG-NEXT: cmovol %edi, %eax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti32: ; FAST: ## %bb.0: -; FAST-NEXT: movl %edi, %eax -; FAST-NEXT: imull %esi, %eax -; FAST-NEXT: cmovol %edi, %esi ; FAST-NEXT: movl %esi, %eax +; FAST-NEXT: movl %edi, %ecx +; FAST-NEXT: imull %esi, %ecx +; FAST-NEXT: cmovol %edi, %eax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti32: ; KNL: ## %bb.0: -; KNL-NEXT: movl %edi, %eax -; KNL-NEXT: imull %esi, %eax -; KNL-NEXT: cmovol %edi, %esi ; KNL-NEXT: movl %esi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: imull %esi, %ecx +; KNL-NEXT: cmovol %edi, %eax ; KNL-NEXT: retq %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -399,26 +408,26 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; SDAG-LABEL: smuloselecti64: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq %rdi, %rax -; SDAG-NEXT: imulq %rsi, %rax -; SDAG-NEXT: cmovoq %rdi, %rsi ; SDAG-NEXT: movq %rsi, %rax +; SDAG-NEXT: movq %rdi, %rcx +; SDAG-NEXT: imulq %rsi, %rcx +; SDAG-NEXT: cmovoq %rdi, %rax ; SDAG-NEXT: retq ; ; FAST-LABEL: smuloselecti64: ; FAST: ## %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: imulq %rsi, %rax -; FAST-NEXT: cmovoq %rdi, %rsi ; FAST-NEXT: movq %rsi, %rax +; FAST-NEXT: movq %rdi, %rcx +; FAST-NEXT: imulq %rsi, %rcx +; FAST-NEXT: cmovoq %rdi, %rax ; FAST-NEXT: retq ; ; KNL-LABEL: smuloselecti64: ; KNL: ## %bb.0: -; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: imulq %rsi, %rax -; KNL-NEXT: cmovoq %rdi, %rsi ; KNL-NEXT: movq %rsi, %rax +; KNL-NEXT: movq %rdi, %rcx +; KNL-NEXT: imulq %rsi, %rcx +; KNL-NEXT: cmovoq %rdi, %rax ; KNL-NEXT: retq %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -694,8 +703,8 @@ define i1 @bug27873(i64 %c1, i1 %c2) { ; SDAG-LABEL: bug27873: ; SDAG: ## %bb.0: -; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: movq %rdi, %rax +; SDAG-NEXT: movl $160, %ecx ; SDAG-NEXT: mulq %rcx ; SDAG-NEXT: seto %al ; SDAG-NEXT: orb %sil, %al @@ -703,8 +712,8 @@ ; ; FAST-LABEL: bug27873: ; FAST: ## %bb.0: -; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: movl $160, %ecx ; FAST-NEXT: mulq %rcx ; FAST-NEXT: seto %al ; FAST-NEXT: orb %sil, %al @@ -712,8 +721,8 @@ ; ; KNL-LABEL: bug27873: ; KNL: ## %bb.0: -; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: mulq %rcx ; KNL-NEXT: seto %al ; KNL-NEXT: orb %sil, %al Index: test/CodeGen/X86/xor.ll =================================================================== --- test/CodeGen/X86/xor.ll +++ test/CodeGen/X86/xor.ll @@ -44,18 +44,18 @@ ; ; X64-LIN-LABEL: test3: ; X64-LIN: # %bb.0: # %entry -; X64-LIN-NEXT: notl %esi -; X64-LIN-NEXT: andl %edi, %esi -; X64-LIN-NEXT: shrl %esi ; X64-LIN-NEXT: movl %esi, %eax +; X64-LIN-NEXT: notl %eax +; X64-LIN-NEXT: andl %edi, %eax +; X64-LIN-NEXT: shrl %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test3: ; X64-WIN: # %bb.0: # %entry -; X64-WIN-NEXT: notl %edx -; X64-WIN-NEXT: andl %ecx, %edx -; X64-WIN-NEXT: shrl %edx ; X64-WIN-NEXT: movl %edx, %eax +; X64-WIN-NEXT: notl %eax +; X64-WIN-NEXT: andl %ecx, %eax +; X64-WIN-NEXT: shrl %eax ; X64-WIN-NEXT: retq entry: %tmp1not = xor i32 %b, -2 @@ -84,34 +84,34 @@ ; ; X64-LIN-LABEL: test4: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB3_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorl %esi, %edi -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: notl %eax -; X64-LIN-NEXT: andl %esi, %eax -; X64-LIN-NEXT: addl %eax, %eax -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notl %ecx +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB3_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test4: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB3_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorl %edx, %ecx -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: notl %eax -; X64-WIN-NEXT: andl %edx, %eax -; X64-WIN-NEXT: addl %eax, %eax -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notl %ecx +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB3_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -150,38 +150,39 @@ ; ; X64-LIN-LABEL: test5: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB4_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorl %esi, %edi -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: notl %eax -; X64-LIN-NEXT: andl %esi, %eax -; X64-LIN-NEXT: addl %eax, %eax -; X64-LIN-NEXT: testw %ax, %ax -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notl %ecx +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: testw %cx, %cx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB4_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: # kill: def $ax killed $ax killed $eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test5: ; X64-WIN: # %bb.0: # %entry ; X64-WIN-NEXT: # kill: def $dx killed $dx def $edx -; X64-WIN-NEXT: # kill: def $cx killed $cx def $ecx +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB4_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorl %edx, %ecx -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: notl %eax -; X64-WIN-NEXT: andl %edx, %eax -; X64-WIN-NEXT: addl %eax, %eax -; X64-WIN-NEXT: testw %ax, %ax -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notl %ecx +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: testw %cx, %cx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB4_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: # kill: def $ax killed $ax killed $eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -218,34 +219,35 @@ ; ; X64-LIN-LABEL: test6: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB5_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorb %sil, %dil -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: notb %al -; X64-LIN-NEXT: andb %sil, %al -; X64-LIN-NEXT: addb %al, %al -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorb %sil, %al +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: notb %cl +; X64-LIN-NEXT: andb %sil, %cl +; X64-LIN-NEXT: addb %cl, %cl +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB5_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: # kill: def $al killed $al killed $eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test6: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB5_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorb %dl, %cl -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: notb %al -; X64-WIN-NEXT: andb %dl, %al -; X64-WIN-NEXT: addb %al, %al -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorb %dl, %al +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: notb %cl +; X64-WIN-NEXT: andb %dl, %cl +; X64-WIN-NEXT: addb %cl, %cl +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB5_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -282,34 +284,34 @@ ; ; X64-LIN-LABEL: test7: ; X64-LIN: # %bb.0: # %entry +; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB6_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-LIN-NEXT: xorl %esi, %edi -; X64-LIN-NEXT: movl %edi, %eax -; X64-LIN-NEXT: xorl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-LIN-NEXT: andl %esi, %eax -; X64-LIN-NEXT: addl %eax, %eax -; X64-LIN-NEXT: movl %eax, %esi +; X64-LIN-NEXT: xorl %esi, %eax +; X64-LIN-NEXT: movl %eax, %ecx +; X64-LIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE +; X64-LIN-NEXT: andl %esi, %ecx +; X64-LIN-NEXT: addl %ecx, %ecx +; X64-LIN-NEXT: movl %ecx, %esi ; X64-LIN-NEXT: jne .LBB6_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 -; X64-LIN-NEXT: movl %edi, %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test7: ; X64-WIN: # %bb.0: # %entry +; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB6_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-WIN-NEXT: xorl %edx, %ecx -; X64-WIN-NEXT: movl %ecx, %eax -; X64-WIN-NEXT: xorl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-WIN-NEXT: andl %edx, %eax -; X64-WIN-NEXT: addl %eax, %eax -; X64-WIN-NEXT: movl %eax, %edx +; X64-WIN-NEXT: xorl %edx, %eax +; X64-WIN-NEXT: movl %eax, %ecx +; X64-WIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE +; X64-WIN-NEXT: andl %edx, %ecx +; X64-WIN-NEXT: addl %ecx, %ecx +; X64-WIN-NEXT: movl %ecx, %edx ; X64-WIN-NEXT: jne .LBB6_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 -; X64-WIN-NEXT: movl %ecx, %eax ; X64-WIN-NEXT: retq entry: br label %bb @@ -336,14 +338,14 @@ ; ; X64-LIN-LABEL: test8: ; X64-LIN: # %bb.0: # %entry -; X64-LIN-NEXT: notl %edi ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: notl %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test8: ; X64-WIN: # %bb.0: # %entry -; X64-WIN-NEXT: notl %ecx ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: notl %eax ; X64-WIN-NEXT: retq entry: %t1 = sub i32 0, %a @@ -361,16 +363,16 @@ ; ; X64-LIN-LABEL: test9: ; X64-LIN: # %bb.0: -; X64-LIN-NEXT: notl %edi -; X64-LIN-NEXT: andl $4096, %edi # imm = 0x1000 ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: notl %eax +; X64-LIN-NEXT: andl $4096, %eax # imm = 0x1000 ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: test9: ; X64-WIN: # %bb.0: -; X64-WIN-NEXT: notl %ecx -; X64-WIN-NEXT: andl $4096, %ecx # imm = 0x1000 ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: notl %eax +; X64-WIN-NEXT: andl $4096, %eax # imm = 0x1000 ; X64-WIN-NEXT: retq %1 = and i32 %a, 4096 %2 = xor i32 %1, 4096 @@ -459,8 +461,9 @@ ; ; X64-LIN-LABEL: test11: ; X64-LIN: # %bb.0: -; X64-LIN-NEXT: movl $-2, %eax ; X64-LIN-NEXT: movl %edi, %ecx +; X64-LIN-NEXT: movl $-2, %eax +; X64-LIN-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-LIN-NEXT: roll %cl, %eax ; X64-LIN-NEXT: retq ; Index: test/DebugInfo/COFF/pieces.ll =================================================================== --- test/DebugInfo/COFF/pieces.ll +++ test/DebugInfo/COFF/pieces.ll @@ -65,15 +65,15 @@ ; ASM-LABEL: pad_right: # @pad_right -; ASM: #DEBUG_VALUE: pad_right:o <- [DW_OP_LLVM_fragment 32 32] $ecx -; ASM: movl %ecx, %eax +; ASM: movq %rcx, %rax +; ASM: #DEBUG_VALUE: pad_right:o <- [DW_OP_LLVM_fragment 32 32] $eax ; ASM: retq ; ASM-LABEL: pad_left: # @pad_left -; ASM: #DEBUG_VALUE: pad_left:o <- [DW_OP_LLVM_fragment 0 32] $ecx ; ASM: .cv_loc 2 1 24 3 # t.c:24:3 -; ASM: movl %ecx, %eax +; ASM: movq %rcx, %rax +; ASM: #DEBUG_VALUE: pad_left:o <- [DW_OP_LLVM_fragment 0 32] $eax ; ASM: retq @@ -136,7 +136,7 @@ ; ASM: .asciz "pad_right" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range .Lfunc_begin1 .Lfunc_end1, "C\021\022\000\000\000\004\000\000\000" +; ASM: .cv_def_range .Lfunc_begin1 .Ltmp8, "C\021\021\000\000\000\004\000\000\000" ; OBJ-LABEL: {{.*}}Proc{{.*}}Sym { ; OBJ: Kind: S_GPROC32_ID (0x1147) @@ -146,7 +146,7 @@ ; OBJ: VarName: o ; OBJ: } ; OBJ: DefRangeSubfieldRegisterSym { -; OBJ: Register: CVRegECX (0x12) +; OBJ: Register: CVRegEAX (0x11) ; OBJ: MayHaveNoName: 0 ; OBJ: OffsetInParent: 4 ; OBJ: LocalVariableAddrRange { @@ -159,7 +159,7 @@ ; ASM: .asciz "pad_left" # Function name ; ASM: .short 4414 # Record kind: S_LOCAL ; ASM: .asciz "o" -; ASM: .cv_def_range .Lfunc_begin2 .Lfunc_end2, "C\021\022\000\000\000\000\000\000\000" +; ASM: .cv_def_range .Lfunc_begin2 .Ltmp10, "C\021\021\000\000\000\000\000\000\000" ; OBJ-LABEL: {{.*}}Proc{{.*}}Sym { ; OBJ: Kind: S_GPROC32_ID (0x1147) @@ -169,7 +169,7 @@ ; OBJ: VarName: o ; OBJ: } ; OBJ: DefRangeSubfieldRegisterSym { -; OBJ: Register: CVRegECX (0x12) +; OBJ: Register: CVRegEAX (0x11) ; OBJ: MayHaveNoName: 0 ; OBJ: OffsetInParent: 0 ; OBJ: LocalVariableAddrRange { Index: test/DebugInfo/X86/live-debug-values.ll =================================================================== --- test/DebugInfo/X86/live-debug-values.ll +++ test/DebugInfo/X86/live-debug-values.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: #DEBUG_VALUE: main:n <- $ebx ; Other register values have been clobbered. ; CHECK-NOT: #DEBUG_VALUE: -; CHECK: movl %ecx, m(%rip) +; CHECK: movl %esi, m(%rip) ; ModuleID = 'LiveDebugValues.c' source_filename = "test/DebugInfo/X86/live-debug-values.ll" Index: test/DebugInfo/X86/live-debug-variables.ll =================================================================== --- test/DebugInfo/X86/live-debug-variables.ll +++ test/DebugInfo/X86/live-debug-variables.ll @@ -25,7 +25,7 @@ ; CHECK: .debug_loc contents: ; CHECK-NEXT: 0x00000000: ; We currently emit an entry for the function prologue, too, which could be optimized away. -; CHECK: [0x000000000000001f, 0x000000000000003c): DW_OP_reg3 RBX +; CHECK: [0x0000000000000018, 0x0000000000000072): DW_OP_reg3 RBX ; We should only have one entry inside the function. ; CHECK-NOT: : Index: test/DebugInfo/X86/pieces-3.ll =================================================================== --- test/DebugInfo/X86/pieces-3.ll +++ test/DebugInfo/X86/pieces-3.ll @@ -17,11 +17,12 @@ ; ; CHECK: DW_TAG_formal_parameter [3] ; CHECK-NEXT: DW_AT_location [DW_FORM_data4] ( -; CHECK-NEXT: [0x0000000000000000, 0x0000000000000004): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4 -; CHECK-NEXT: [0x0000000000000004, 0x0000000000000008): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4) +; CHECK-NEXT: [0x0000000000000000, 0x0000000000000007): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg4 RSI, DW_OP_piece 0x4 +; CHECK-NEXT: [0x0000000000000007, 0x0000000000000007): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_piece 0x4, DW_OP_reg0 RAX, DW_OP_piece 0x4) ; CHECK-NEXT: DW_AT_name {{.*}}"outer" ; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_location {{.*}}(DW_OP_reg4 RSI, DW_OP_piece 0x4) +; CHECK-NEXT: DW_AT_location [DW_FORM_data4] (0x00000044 +; CHECK-NEXT: [0x0000000000000007, 0x0000000000000007): DW_OP_reg0 RAX, DW_OP_piece 0x4) ; CHECK-NEXT: "i1" ; ModuleID = '/Volumes/Data/llvm/test/DebugInfo/X86/sroasplit-2.ll'