Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41219,14 +41219,35 @@ Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) - : &X86::GR64RegClass; - if (RC->contains(DestReg)) - Res = std::make_pair(DestReg, RC); - } else { - // No register found/type mismatch. - Res.first = 0; - Res.second = nullptr; + : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) + : nullptr; + if (Size==64 && !is64Bit) { + //Model GCC's behavior here and select a fixed pair of 32-bit registers. + switch (Res.first) { + case X86::EAX: + return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); + case X86::EDX: + return std::make_pair(X86::EDX, &X86::GR32_DCRegClass); + case X86::ECX: + return std::make_pair(X86::ECX, &X86::GR32_CBRegClass); + case X86::EBX: + return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass); + case X86::ESI: + return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass); + case X86::EDI: + return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass); + case X86::EBP: + return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass); + default: + return std::make_pair(0, nullptr); + } + } + if (RC && RC->contains(DestReg)) + return std::make_pair(DestReg, RC); + return Res; } + // No register found/type mismatch. + return std::make_pair(0, nullptr); } else if (isFRClass(*Class)) { // Handle references to XMM physical registers that got mapped into the // wrong class. This can happen with constraints like {xmm0} where the Index: llvm/lib/Target/X86/X86RegisterInfo.td =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.td +++ llvm/lib/Target/X86/X86RegisterInfo.td @@ -499,6 +499,16 @@ def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>; def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>; +// Classes to support the 64-bit assembler constraint tied to a fixed +// register in 32-bit mode. The second register is always the next in +// the list. Wrap around causes an error. +def GR32_DC : RegisterClass<"X86", [i32], 32, (add EDX, ECX)>; +def GR32_CB : RegisterClass<"X86", [i32], 32, (add ECX, EBX)>; +def GR32_BSI : RegisterClass<"X86", [i32], 32, (add EBX, ESI)>; +def GR32_SIDI : RegisterClass<"X86", [i32], 32, (add ESI, EDI)>; +def GR32_DIBP : RegisterClass<"X86", [i32], 32, (add EDI, EBP)>; +def GR32_BPSP : RegisterClass<"X86", [i32], 32, (add EBP, ESP)>; + // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; Index: llvm/test/CodeGen/X86/atomic_mi.ll =================================================================== --- llvm/test/CodeGen/X86/atomic_mi.ll +++ llvm/test/CodeGen/X86/atomic_mi.ll @@ -2245,11 +2245,11 @@ ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: movl 20(%ebp), %esi -; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) Index: llvm/test/CodeGen/X86/avx512-regcall-Mask.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-regcall-Mask.ll +++ llvm/test/CodeGen/X86/avx512-regcall-Mask.ll @@ -129,9 +129,9 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $48, %rsp ; WIN64-NEXT: .seh_stackalloc 48 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 32 -; WIN64-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 16 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 @@ -139,7 +139,6 @@ ; WIN64-NEXT: movq %rax, %rcx ; WIN64-NEXT: movq %rax, %rdx ; WIN64-NEXT: movq %rax, %rdi -; WIN64-NEXT: movq %rax, %rsi ; WIN64-NEXT: movq %rax, %r8 ; WIN64-NEXT: movq %rax, %r9 ; WIN64-NEXT: movq %rax, %r10 @@ -147,9 +146,10 @@ ; WIN64-NEXT: movq %rax, %r12 ; WIN64-NEXT: movq %rax, %r14 ; WIN64-NEXT: movq %rax, %r15 +; WIN64-NEXT: movq %rax, %rsi ; WIN64-NEXT: callq test_argv64i1 -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $48, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -181,13 +181,13 @@ ; LINUXOSX64-NEXT: movq %rax, %rcx ; LINUXOSX64-NEXT: movq %rax, %rdx ; LINUXOSX64-NEXT: movq %rax, %rdi -; LINUXOSX64-NEXT: movq %rax, %rsi ; LINUXOSX64-NEXT: movq %rax, %r8 ; LINUXOSX64-NEXT: movq %rax, %r9 ; LINUXOSX64-NEXT: movq %rax, %r12 ; LINUXOSX64-NEXT: movq %rax, %r13 ; LINUXOSX64-NEXT: movq %rax, %r14 ; LINUXOSX64-NEXT: movq %rax, %r15 +; LINUXOSX64-NEXT: movq %rax, %rsi ; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 ; LINUXOSX64-NEXT: pushq %rax @@ -249,7 +249,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -258,7 +258,7 @@ ; WIN64-NEXT: kmovq %rax, %k0 ; WIN64-NEXT: vpmovm2b %k0, %zmm0 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -289,9 +289,9 @@ ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esp ; X32-NEXT: subl $72, %esp -; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: kmovd %edx, %k0 ; X32-NEXT: kmovd %ecx, %k1 @@ -304,9 +304,9 @@ ; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 ; X32-NEXT: calll _test_argv32i1helper ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload ; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esp ; X32-NEXT: vzeroupper @@ -349,13 +349,13 @@ ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: subq $128, %rsp -; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 @@ -378,13 +378,13 @@ ; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 ; LINUXOSX64-NEXT: callq test_argv32i1helper ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload ; LINUXOSX64-NEXT: addq $128, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: popq %rsp @@ -414,7 +414,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -424,7 +424,7 @@ ; WIN64-NEXT: movl $1, %edx ; WIN64-NEXT: callq test_argv32i1 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -481,7 +481,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -489,7 +489,7 @@ ; WIN64-NEXT: callq test_retv32i1 ; WIN64-NEXT: incl %eax ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -521,9 +521,9 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %esp ; X32-NEXT: subl $72, %esp -; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: kmovd %edx, %k0 ; X32-NEXT: kmovd %ecx, %k1 @@ -537,9 +537,9 @@ ; X32-NEXT: vzeroupper ; X32-NEXT: calll _test_argv16i1helper ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload ; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esp ; X32-NEXT: retl @@ -581,13 +581,13 @@ ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: subq $128, %rsp -; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 @@ -611,13 +611,13 @@ ; LINUXOSX64-NEXT: vzeroupper ; LINUXOSX64-NEXT: callq test_argv16i1helper ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload ; LINUXOSX64-NEXT: addq $128, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: popq %rsp @@ -645,7 +645,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -655,7 +655,7 @@ ; WIN64-NEXT: movl $1, %edx ; WIN64-NEXT: callq test_argv16i1 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -714,7 +714,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -724,7 +724,7 @@ ; WIN64-NEXT: incl %eax ; WIN64-NEXT: # kill: def $ax killed $ax killed $eax ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -758,9 +758,9 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %esp ; X32-NEXT: subl $72, %esp -; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill -; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: kmovd %edx, %k0 ; X32-NEXT: kmovd %ecx, %k1 @@ -774,9 +774,9 @@ ; X32-NEXT: vzeroupper ; X32-NEXT: calll _test_argv8i1helper ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload -; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload +; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload ; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esp ; X32-NEXT: retl @@ -818,13 +818,13 @@ ; LINUXOSX64-NEXT: pushq %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: subq $128, %rsp -; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill -; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 @@ -848,13 +848,13 @@ ; LINUXOSX64-NEXT: vzeroupper ; LINUXOSX64-NEXT: callq test_argv8i1helper ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload -; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload +; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload ; LINUXOSX64-NEXT: addq $128, %rsp ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 ; LINUXOSX64-NEXT: popq %rsp @@ -882,7 +882,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -892,7 +892,7 @@ ; WIN64-NEXT: movl $1, %edx ; WIN64-NEXT: callq test_argv8i1 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi @@ -953,7 +953,7 @@ ; WIN64-NEXT: .seh_pushreg 7 ; WIN64-NEXT: subq $40, %rsp ; WIN64-NEXT: .seh_stackalloc 40 -; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill +; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 7, 16 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; WIN64-NEXT: .seh_savexmm 6, 0 @@ -964,7 +964,7 @@ ; WIN64-NEXT: vpmovm2w %k0, %zmm0 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload -; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload +; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload ; WIN64-NEXT: addq $40, %rsp ; WIN64-NEXT: popq %rdi ; WIN64-NEXT: popq %rsi Index: llvm/test/CodeGen/X86/physreg-pairs-error.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/physreg-pairs-error.ll @@ -0,0 +1,12 @@ +; RUN: not llc -mtriple=i386-unknown-linux-gnu -o - %s 2>&1 | FileCheck %s + +; CHECK: error: couldn't allocate input reg for constraint '{esp}' +define dso_local i64 @test_esp(i64 %in) local_unnamed_addr nounwind { +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{esp},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + Index: llvm/test/CodeGen/X86/physreg-pairs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/physreg-pairs.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i386-unknown-linux-gnu -o - %s | FileCheck %s + +; To match GCCs behavior in assigning 64-bit values ot a 32-bit +; register we bind to the value to the pair (the given register, the following +; register) in the sequence. EAX, EDX, ECX, EBX, ESI, EDI, EBP, ESP. There +; is no wrapping, so this will fail given ESP. + +define dso_local i64 @test_eax(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_eax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $-1985229329, %eax # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %edx # imm = 0x1234567 +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %eax, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{eax},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + +define dso_local i64 @test_edx(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_edx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $-1985229329, %edx # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %ecx # imm = 0x1234567 +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{edx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + +define dso_local i64 @test_ecx(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_ecx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: movl $-1985229329, %ecx # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %ebx # imm = 0x1234567 +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ecx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + +define dso_local i64 @test_ebx(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_ebx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl $-1985229329, %ebx # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %esi # imm = 0x1234567 +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ebx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + +define dso_local i64 @test_esi(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_esi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl $-1985229329, %esi # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %edi # imm = 0x1234567 +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{esi},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + +define dso_local i64 @test_edi(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_edi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: movl $-1985229329, %edi # imm = 0x89ABCDEF +; CHECK-NEXT: movl $19088743, %ebp # imm = 0x1234567 +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{edi},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + +define dso_local i64 @test_ebp(i64 %in) local_unnamed_addr nounwind { +; CHECK-LABEL: test_ebp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl $19088743, %esp # imm = 0x1234567 +; CHECK-NEXT: movl $-1985229329, %ebp # imm = 0x89ABCDEF +; CHECK-NEXT: #APP +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ebp},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895) + %conv = trunc i64 %0 to i32 + %add = add nsw i32 %conv, 3 + %conv1 = sext i32 %add to i64 + ret i64 %conv1 +} + Index: llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll =================================================================== --- llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll +++ llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll @@ -1348,8 +1348,6 @@ ; CHECK-BASELINE-NEXT: movq %rcx, %r15 ; CHECK-BASELINE-NEXT: movq %rsi, %r14 ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-BASELINE-NEXT: movb 15(%rcx), %al -; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 16(%rcx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 17(%rcx), %al @@ -1361,11 +1359,11 @@ ; CHECK-BASELINE-NEXT: movb 20(%rcx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 21(%rcx), %r12b -; CHECK-BASELINE-NEXT: movb 22(%rcx), %r10b -; CHECK-BASELINE-NEXT: movb 23(%rcx), %r11b -; CHECK-BASELINE-NEXT: movb 24(%rcx), %bpl -; CHECK-BASELINE-NEXT: movb 25(%rcx), %r13b -; CHECK-BASELINE-NEXT: movb 26(%rcx), %r9b +; CHECK-BASELINE-NEXT: movb 22(%rcx), %r9b +; CHECK-BASELINE-NEXT: movb 23(%rcx), %r10b +; CHECK-BASELINE-NEXT: movb 24(%rcx), %r11b +; CHECK-BASELINE-NEXT: movb 25(%rcx), %bpl +; CHECK-BASELINE-NEXT: movb 26(%rcx), %r13b ; CHECK-BASELINE-NEXT: movb 27(%rcx), %r8b ; CHECK-BASELINE-NEXT: movb 28(%rcx), %dil ; CHECK-BASELINE-NEXT: movb 29(%rcx), %sil @@ -1402,35 +1400,35 @@ ; CHECK-BASELINE-NEXT: orb %al, %r8b ; CHECK-BASELINE-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 26(%r14), %al -; CHECK-BASELINE-NEXT: andb %r9b, %al -; CHECK-BASELINE-NEXT: notb %r9b -; CHECK-BASELINE-NEXT: andb 26(%rdx), %r9b -; CHECK-BASELINE-NEXT: orb %al, %r9b -; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 25(%r14), %al ; CHECK-BASELINE-NEXT: andb %r13b, %al ; CHECK-BASELINE-NEXT: notb %r13b -; CHECK-BASELINE-NEXT: andb 25(%rdx), %r13b +; CHECK-BASELINE-NEXT: andb 26(%rdx), %r13b ; CHECK-BASELINE-NEXT: orb %al, %r13b ; CHECK-BASELINE-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 24(%r14), %al +; CHECK-BASELINE-NEXT: movb 25(%r14), %al ; CHECK-BASELINE-NEXT: andb %bpl, %al ; CHECK-BASELINE-NEXT: notb %bpl -; CHECK-BASELINE-NEXT: andb 24(%rdx), %bpl +; CHECK-BASELINE-NEXT: andb 25(%rdx), %bpl ; CHECK-BASELINE-NEXT: orb %al, %bpl ; CHECK-BASELINE-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 23(%r14), %al +; CHECK-BASELINE-NEXT: movb 24(%r14), %al ; CHECK-BASELINE-NEXT: andb %r11b, %al ; CHECK-BASELINE-NEXT: notb %r11b -; CHECK-BASELINE-NEXT: andb 23(%rdx), %r11b +; CHECK-BASELINE-NEXT: andb 24(%rdx), %r11b ; CHECK-BASELINE-NEXT: orb %al, %r11b ; CHECK-BASELINE-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 22(%r14), %al +; CHECK-BASELINE-NEXT: movb 23(%r14), %al ; CHECK-BASELINE-NEXT: andb %r10b, %al ; CHECK-BASELINE-NEXT: notb %r10b -; CHECK-BASELINE-NEXT: andb 22(%rdx), %r10b +; CHECK-BASELINE-NEXT: andb 23(%rdx), %r10b ; CHECK-BASELINE-NEXT: orb %al, %r10b ; CHECK-BASELINE-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-BASELINE-NEXT: movb 22(%r14), %al +; CHECK-BASELINE-NEXT: andb %r9b, %al +; CHECK-BASELINE-NEXT: notb %r9b +; CHECK-BASELINE-NEXT: andb 22(%rdx), %r9b +; CHECK-BASELINE-NEXT: orb %al, %r9b +; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 21(%r14), %al ; CHECK-BASELINE-NEXT: andb %r12b, %al ; CHECK-BASELINE-NEXT: notb %r12b @@ -1462,6 +1460,7 @@ ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: andb %cl, %al ; CHECK-BASELINE-NEXT: notb %cl +; CHECK-BASELINE-NEXT: movq %rdx, %rbx ; CHECK-BASELINE-NEXT: andb 17(%rdx), %cl ; CHECK-BASELINE-NEXT: orb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill @@ -1469,12 +1468,11 @@ ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: andb %cl, %al ; CHECK-BASELINE-NEXT: notb %cl -; CHECK-BASELINE-NEXT: movq %rdx, %rbx ; CHECK-BASELINE-NEXT: andb 16(%rdx), %cl ; CHECK-BASELINE-NEXT: orb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-BASELINE-NEXT: movb 15(%r15), %cl ; CHECK-BASELINE-NEXT: movb 15(%r14), %al -; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: andb %cl, %al ; CHECK-BASELINE-NEXT: notb %cl ; CHECK-BASELINE-NEXT: andb 15(%rdx), %cl @@ -1646,8 +1644,6 @@ ; CHECK-SSE1-NEXT: movq %rcx, %r15 ; CHECK-SSE1-NEXT: movq %rsi, %r14 ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-SSE1-NEXT: movb 15(%rcx), %al -; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 16(%rcx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 17(%rcx), %al @@ -1659,11 +1655,11 @@ ; CHECK-SSE1-NEXT: movb 20(%rcx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 21(%rcx), %r12b -; CHECK-SSE1-NEXT: movb 22(%rcx), %r10b -; CHECK-SSE1-NEXT: movb 23(%rcx), %r11b -; CHECK-SSE1-NEXT: movb 24(%rcx), %bpl -; CHECK-SSE1-NEXT: movb 25(%rcx), %r13b -; CHECK-SSE1-NEXT: movb 26(%rcx), %r9b +; CHECK-SSE1-NEXT: movb 22(%rcx), %r9b +; CHECK-SSE1-NEXT: movb 23(%rcx), %r10b +; CHECK-SSE1-NEXT: movb 24(%rcx), %r11b +; CHECK-SSE1-NEXT: movb 25(%rcx), %bpl +; CHECK-SSE1-NEXT: movb 26(%rcx), %r13b ; CHECK-SSE1-NEXT: movb 27(%rcx), %r8b ; CHECK-SSE1-NEXT: movb 28(%rcx), %dil ; CHECK-SSE1-NEXT: movb 29(%rcx), %sil @@ -1700,35 +1696,35 @@ ; CHECK-SSE1-NEXT: orb %al, %r8b ; CHECK-SSE1-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 26(%r14), %al -; CHECK-SSE1-NEXT: andb %r9b, %al -; CHECK-SSE1-NEXT: notb %r9b -; CHECK-SSE1-NEXT: andb 26(%rdx), %r9b -; CHECK-SSE1-NEXT: orb %al, %r9b -; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 25(%r14), %al ; CHECK-SSE1-NEXT: andb %r13b, %al ; CHECK-SSE1-NEXT: notb %r13b -; CHECK-SSE1-NEXT: andb 25(%rdx), %r13b +; CHECK-SSE1-NEXT: andb 26(%rdx), %r13b ; CHECK-SSE1-NEXT: orb %al, %r13b ; CHECK-SSE1-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 24(%r14), %al +; CHECK-SSE1-NEXT: movb 25(%r14), %al ; CHECK-SSE1-NEXT: andb %bpl, %al ; CHECK-SSE1-NEXT: notb %bpl -; CHECK-SSE1-NEXT: andb 24(%rdx), %bpl +; CHECK-SSE1-NEXT: andb 25(%rdx), %bpl ; CHECK-SSE1-NEXT: orb %al, %bpl ; CHECK-SSE1-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 23(%r14), %al +; CHECK-SSE1-NEXT: movb 24(%r14), %al ; CHECK-SSE1-NEXT: andb %r11b, %al ; CHECK-SSE1-NEXT: notb %r11b -; CHECK-SSE1-NEXT: andb 23(%rdx), %r11b +; CHECK-SSE1-NEXT: andb 24(%rdx), %r11b ; CHECK-SSE1-NEXT: orb %al, %r11b ; CHECK-SSE1-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 22(%r14), %al +; CHECK-SSE1-NEXT: movb 23(%r14), %al ; CHECK-SSE1-NEXT: andb %r10b, %al ; CHECK-SSE1-NEXT: notb %r10b -; CHECK-SSE1-NEXT: andb 22(%rdx), %r10b +; CHECK-SSE1-NEXT: andb 23(%rdx), %r10b ; CHECK-SSE1-NEXT: orb %al, %r10b ; CHECK-SSE1-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-SSE1-NEXT: movb 22(%r14), %al +; CHECK-SSE1-NEXT: andb %r9b, %al +; CHECK-SSE1-NEXT: notb %r9b +; CHECK-SSE1-NEXT: andb 22(%rdx), %r9b +; CHECK-SSE1-NEXT: orb %al, %r9b +; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 21(%r14), %al ; CHECK-SSE1-NEXT: andb %r12b, %al ; CHECK-SSE1-NEXT: notb %r12b @@ -1760,6 +1756,7 @@ ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: andb %cl, %al ; CHECK-SSE1-NEXT: notb %cl +; CHECK-SSE1-NEXT: movq %rdx, %rbx ; CHECK-SSE1-NEXT: andb 17(%rdx), %cl ; CHECK-SSE1-NEXT: orb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill @@ -1767,12 +1764,11 @@ ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: andb %cl, %al ; CHECK-SSE1-NEXT: notb %cl -; CHECK-SSE1-NEXT: movq %rdx, %rbx ; CHECK-SSE1-NEXT: andb 16(%rdx), %cl ; CHECK-SSE1-NEXT: orb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-SSE1-NEXT: movb 15(%r15), %cl ; CHECK-SSE1-NEXT: movb 15(%r14), %al -; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: andb %cl, %al ; CHECK-SSE1-NEXT: notb %cl ; CHECK-SSE1-NEXT: andb 15(%rdx), %cl @@ -3527,9 +3523,7 @@ ; CHECK-BASELINE-NEXT: movq %rdx, %r13 ; CHECK-BASELINE-NEXT: movq %rsi, %rbx ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-BASELINE-NEXT: movb 16(%rdx), %r12b -; CHECK-BASELINE-NEXT: movb 15(%rdx), %al -; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-BASELINE-NEXT: movb 15(%rdx), %r12b ; CHECK-BASELINE-NEXT: movb 14(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 13(%rdx), %al @@ -3540,13 +3534,13 @@ ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 10(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 9(%rdx), %r10b -; CHECK-BASELINE-NEXT: movb 8(%rdx), %r11b -; CHECK-BASELINE-NEXT: movb 7(%rdx), %r9b +; CHECK-BASELINE-NEXT: movb 9(%rdx), %r9b +; CHECK-BASELINE-NEXT: movb 8(%rdx), %r10b +; CHECK-BASELINE-NEXT: movb 7(%rdx), %r11b ; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b ; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl -; CHECK-BASELINE-NEXT: movb 4(%rdx), %dil -; CHECK-BASELINE-NEXT: movb 3(%rdx), %sil +; CHECK-BASELINE-NEXT: movb 4(%rdx), %sil +; CHECK-BASELINE-NEXT: movb 3(%rdx), %dil ; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b ; CHECK-BASELINE-NEXT: movb (%rdx), %al ; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b @@ -3566,14 +3560,14 @@ ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 3(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %sil, %al +; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: andb 3(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %sil, %al +; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 4(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %dil, %al +; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: andb 4(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %dil, %al +; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 5(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al @@ -3586,19 +3580,19 @@ ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 7(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r9b, %al +; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: andb 7(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r9b, %al +; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 8(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r11b, %al +; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: andb 8(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r11b, %al +; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 9(%rbx), %al -; CHECK-BASELINE-NEXT: xorb %r10b, %al +; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: andb 9(%rcx), %al -; CHECK-BASELINE-NEXT: xorb %r10b, %al +; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload @@ -3630,17 +3624,17 @@ ; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 15(%rbx), %dl -; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: andb 15(%rcx), %dl -; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movb 16(%rbx), %al +; CHECK-BASELINE-NEXT: movb 15(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al -; CHECK-BASELINE-NEXT: andb 16(%rcx), %al +; CHECK-BASELINE-NEXT: andb 15(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-BASELINE-NEXT: movb 16(%r13), %al +; CHECK-BASELINE-NEXT: movb 16(%rbx), %dl +; CHECK-BASELINE-NEXT: xorb %al, %dl +; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl +; CHECK-BASELINE-NEXT: xorb %al, %dl +; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 17(%r13), %al ; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl @@ -3657,18 +3651,18 @@ ; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b -; CHECK-BASELINE-NEXT: movq %rcx, %rdx ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: movb 20(%r13), %al -; CHECK-BASELINE-NEXT: movb 20(%rbx), %r14b -; CHECK-BASELINE-NEXT: xorb %al, %r14b -; CHECK-BASELINE-NEXT: andb 20(%rcx), %r14b -; CHECK-BASELINE-NEXT: xorb %al, %r14b -; CHECK-BASELINE-NEXT: movb 21(%r13), %al -; CHECK-BASELINE-NEXT: movb 21(%rbx), %r15b +; CHECK-BASELINE-NEXT: movb 20(%rbx), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b -; CHECK-BASELINE-NEXT: andb 21(%rcx), %r15b +; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b +; CHECK-BASELINE-NEXT: movq %rcx, %rsi ; CHECK-BASELINE-NEXT: xorb %al, %r15b +; CHECK-BASELINE-NEXT: movb 21(%r13), %al +; CHECK-BASELINE-NEXT: movb 21(%rbx), %r14b +; CHECK-BASELINE-NEXT: xorb %al, %r14b +; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b +; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: movb 22(%r13), %al ; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl @@ -3700,39 +3694,39 @@ ; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: movb 28(%r13), %al -; CHECK-BASELINE-NEXT: movb 28(%rbx), %sil -; CHECK-BASELINE-NEXT: xorb %al, %sil -; CHECK-BASELINE-NEXT: andb 28(%rcx), %sil -; CHECK-BASELINE-NEXT: xorb %al, %sil +; CHECK-BASELINE-NEXT: movb 28(%rbx), %dl +; CHECK-BASELINE-NEXT: xorb %al, %dl +; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl +; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb 29(%r13), %al ; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: andb 29(%rdx), %cl +; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb 30(%r13), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movb 30(%rbx), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload -; CHECK-BASELINE-NEXT: andb 30(%rdx), %al +; CHECK-BASELINE-NEXT: andb 30(%rsi), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b ; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl -; CHECK-BASELINE-NEXT: andb 31(%rdx), %bl +; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-BASELINE-NEXT: movb %bl, 31(%r13) ; CHECK-BASELINE-NEXT: movb %al, 30(%r13) ; CHECK-BASELINE-NEXT: movb %cl, 29(%r13) -; CHECK-BASELINE-NEXT: movb %sil, 28(%r13) +; CHECK-BASELINE-NEXT: movb %dl, 28(%r13) ; CHECK-BASELINE-NEXT: movb %dil, 27(%r13) ; CHECK-BASELINE-NEXT: movb %r8b, 26(%r13) ; CHECK-BASELINE-NEXT: movb %r9b, 25(%r13) ; CHECK-BASELINE-NEXT: movb %r10b, 24(%r13) ; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13) ; CHECK-BASELINE-NEXT: movb %bpl, 22(%r13) -; CHECK-BASELINE-NEXT: movb %r15b, 21(%r13) -; CHECK-BASELINE-NEXT: movb %r14b, 20(%r13) +; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13) +; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13) ; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13) ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 18(%r13) @@ -3792,9 +3786,7 @@ ; CHECK-SSE1-NEXT: movq %rdx, %r13 ; CHECK-SSE1-NEXT: movq %rsi, %rbx ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-SSE1-NEXT: movb 16(%rdx), %r12b -; CHECK-SSE1-NEXT: movb 15(%rdx), %al -; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-SSE1-NEXT: movb 15(%rdx), %r12b ; CHECK-SSE1-NEXT: movb 14(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 13(%rdx), %al @@ -3805,13 +3797,13 @@ ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 10(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 9(%rdx), %r10b -; CHECK-SSE1-NEXT: movb 8(%rdx), %r11b -; CHECK-SSE1-NEXT: movb 7(%rdx), %r9b +; CHECK-SSE1-NEXT: movb 9(%rdx), %r9b +; CHECK-SSE1-NEXT: movb 8(%rdx), %r10b +; CHECK-SSE1-NEXT: movb 7(%rdx), %r11b ; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b ; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl -; CHECK-SSE1-NEXT: movb 4(%rdx), %dil -; CHECK-SSE1-NEXT: movb 3(%rdx), %sil +; CHECK-SSE1-NEXT: movb 4(%rdx), %sil +; CHECK-SSE1-NEXT: movb 3(%rdx), %dil ; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b ; CHECK-SSE1-NEXT: movb (%rdx), %al ; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b @@ -3831,14 +3823,14 @@ ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 3(%rbx), %al -; CHECK-SSE1-NEXT: xorb %sil, %al +; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: andb 3(%rcx), %al -; CHECK-SSE1-NEXT: xorb %sil, %al +; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 4(%rbx), %al -; CHECK-SSE1-NEXT: xorb %dil, %al +; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: andb 4(%rcx), %al -; CHECK-SSE1-NEXT: xorb %dil, %al +; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 5(%rbx), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al @@ -3851,19 +3843,19 @@ ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 7(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r9b, %al +; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: andb 7(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r9b, %al +; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 8(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r11b, %al +; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: andb 8(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r11b, %al +; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 9(%rbx), %al -; CHECK-SSE1-NEXT: xorb %r10b, %al +; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: andb 9(%rcx), %al -; CHECK-SSE1-NEXT: xorb %r10b, %al +; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 10(%rbx), %dl ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload @@ -3895,17 +3887,17 @@ ; CHECK-SSE1-NEXT: andb 14(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 15(%rbx), %dl -; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: andb 15(%rcx), %dl -; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movb 16(%rbx), %al +; CHECK-SSE1-NEXT: movb 15(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al -; CHECK-SSE1-NEXT: andb 16(%rcx), %al +; CHECK-SSE1-NEXT: andb 15(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-SSE1-NEXT: movb 16(%r13), %al +; CHECK-SSE1-NEXT: movb 16(%rbx), %dl +; CHECK-SSE1-NEXT: xorb %al, %dl +; CHECK-SSE1-NEXT: andb 16(%rcx), %dl +; CHECK-SSE1-NEXT: xorb %al, %dl +; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 17(%r13), %al ; CHECK-SSE1-NEXT: movb 17(%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl @@ -3922,18 +3914,18 @@ ; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b -; CHECK-SSE1-NEXT: movq %rcx, %rdx ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: movb 20(%r13), %al -; CHECK-SSE1-NEXT: movb 20(%rbx), %r14b -; CHECK-SSE1-NEXT: xorb %al, %r14b -; CHECK-SSE1-NEXT: andb 20(%rcx), %r14b -; CHECK-SSE1-NEXT: xorb %al, %r14b -; CHECK-SSE1-NEXT: movb 21(%r13), %al -; CHECK-SSE1-NEXT: movb 21(%rbx), %r15b +; CHECK-SSE1-NEXT: movb 20(%rbx), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b -; CHECK-SSE1-NEXT: andb 21(%rcx), %r15b +; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b +; CHECK-SSE1-NEXT: movq %rcx, %rsi ; CHECK-SSE1-NEXT: xorb %al, %r15b +; CHECK-SSE1-NEXT: movb 21(%r13), %al +; CHECK-SSE1-NEXT: movb 21(%rbx), %r14b +; CHECK-SSE1-NEXT: xorb %al, %r14b +; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b +; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: movb 22(%r13), %al ; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl @@ -3965,39 +3957,39 @@ ; CHECK-SSE1-NEXT: andb 27(%rcx), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: movb 28(%r13), %al -; CHECK-SSE1-NEXT: movb 28(%rbx), %sil -; CHECK-SSE1-NEXT: xorb %al, %sil -; CHECK-SSE1-NEXT: andb 28(%rcx), %sil -; CHECK-SSE1-NEXT: xorb %al, %sil +; CHECK-SSE1-NEXT: movb 28(%rbx), %dl +; CHECK-SSE1-NEXT: xorb %al, %dl +; CHECK-SSE1-NEXT: andb 28(%rcx), %dl +; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb 29(%r13), %al ; CHECK-SSE1-NEXT: movb 29(%rbx), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: andb 29(%rdx), %cl +; CHECK-SSE1-NEXT: andb 29(%rsi), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb 30(%r13), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movb 30(%rbx), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload -; CHECK-SSE1-NEXT: andb 30(%rdx), %al +; CHECK-SSE1-NEXT: andb 30(%rsi), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb 31(%r13), %r13b ; CHECK-SSE1-NEXT: movb 31(%rbx), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl -; CHECK-SSE1-NEXT: andb 31(%rdx), %bl +; CHECK-SSE1-NEXT: andb 31(%rsi), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-SSE1-NEXT: movb %bl, 31(%r13) ; CHECK-SSE1-NEXT: movb %al, 30(%r13) ; CHECK-SSE1-NEXT: movb %cl, 29(%r13) -; CHECK-SSE1-NEXT: movb %sil, 28(%r13) +; CHECK-SSE1-NEXT: movb %dl, 28(%r13) ; CHECK-SSE1-NEXT: movb %dil, 27(%r13) ; CHECK-SSE1-NEXT: movb %r8b, 26(%r13) ; CHECK-SSE1-NEXT: movb %r9b, 25(%r13) ; CHECK-SSE1-NEXT: movb %r10b, 24(%r13) ; CHECK-SSE1-NEXT: movb %r11b, 23(%r13) ; CHECK-SSE1-NEXT: movb %bpl, 22(%r13) -; CHECK-SSE1-NEXT: movb %r15b, 21(%r13) -; CHECK-SSE1-NEXT: movb %r14b, 20(%r13) +; CHECK-SSE1-NEXT: movb %r14b, 21(%r13) +; CHECK-SSE1-NEXT: movb %r15b, 20(%r13) ; CHECK-SSE1-NEXT: movb %r12b, 19(%r13) ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 18(%r13)