diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6342,19 +6342,8 @@ SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); - if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), - N0.getValueType(), N0Op0); - - // Replace uses of the AND with uses of the Zero extend node. - CombineTo(N, Zext); - - // We actually want to replace all uses of the any_extend with the - // zero_extend, to avoid duplicating things. This will later cause this - // AND to be folded. - CombineTo(N0.getNode(), Zext); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } + if (DAG.MaskedValueIsZero(N0Op0, Mask)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); } // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll --- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -800,52 +800,52 @@ ; AVX: # %bb.0: ; AVX-NEXT: pushq %rbx ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $56, %rax +; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: shrq $48, %rax ; AVX-NEXT: andl $15, %eax -; AVX-NEXT: movq %rdx, %rcx -; AVX-NEXT: shrq $48, %rcx -; AVX-NEXT: andl $15, %ecx -; AVX-NEXT: movq %rdx, %rsi -; AVX-NEXT: shrq $40, %rsi +; AVX-NEXT: movq %rcx, %rdx +; AVX-NEXT: shrq $40, %rdx +; AVX-NEXT: andl $15, %edx +; AVX-NEXT: movq %rcx, %rsi +; AVX-NEXT: shrq $32, %rsi ; AVX-NEXT: andl $15, %esi -; AVX-NEXT: movq %rdx, %r8 -; AVX-NEXT: shrq $32, %r8 -; AVX-NEXT: andl $15, %r8d ; AVX-NEXT: movq %rdi, %r9 -; AVX-NEXT: shrq $56, %r9 +; AVX-NEXT: shrq $48, %r9 ; AVX-NEXT: andl $15, %r9d ; AVX-NEXT: movq %rdi, %r10 -; AVX-NEXT: shrq $48, %r10 +; AVX-NEXT: shrq $40, %r10 ; AVX-NEXT: andl $15, %r10d ; AVX-NEXT: movq %rdi, %r11 -; AVX-NEXT: shrq $40, %r11 +; AVX-NEXT: shrq $32, %r11 ; AVX-NEXT: andl $15, %r11d +; AVX-NEXT: movq %rcx, %r8 +; AVX-NEXT: shrq $56, %r8 +; AVX-NEXT: andl $15, %r8d ; AVX-NEXT: movq %rdi, %rbx -; AVX-NEXT: shrq $32, %rbx +; AVX-NEXT: shrq $56, %rbx ; AVX-NEXT: andl $15, %ebx -; AVX-NEXT: shlq $32, %rbx +; AVX-NEXT: shlq $32, %r11 ; AVX-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; AVX-NEXT: orq %rbx, %rdi -; AVX-NEXT: shlq $40, %r11 -; AVX-NEXT: orq %rdi, %r11 -; AVX-NEXT: shlq $48, %r10 -; AVX-NEXT: orq %r11, %r10 -; AVX-NEXT: shlq $56, %r9 +; AVX-NEXT: orq %r11, %rdi +; AVX-NEXT: shlq $40, %r10 +; AVX-NEXT: orq %rdi, %r10 +; AVX-NEXT: shlq $48, %r9 ; AVX-NEXT: orq %r10, %r9 -; AVX-NEXT: movq %r9, -{{[0-9]+}}(%rsp) -; AVX-NEXT: shlq $32, %r8 -; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F -; AVX-NEXT: orq %r8, %rdx -; AVX-NEXT: shlq $40, %rsi -; AVX-NEXT: orq %rdx, %rsi -; AVX-NEXT: shlq $48, %rcx +; AVX-NEXT: shlq $56, %rbx +; AVX-NEXT: orq %r9, %rbx +; AVX-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) +; AVX-NEXT: shlq $32, %rsi +; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F ; AVX-NEXT: orq %rsi, %rcx -; AVX-NEXT: shlq $56, %rax -; AVX-NEXT: orq %rcx, %rax -; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: shlq $40, %rdx +; AVX-NEXT: orq %rcx, %rdx +; AVX-NEXT: shlq $48, %rax +; AVX-NEXT: orq %rdx, %rax +; AVX-NEXT: shlq $56, %r8 +; AVX-NEXT: orq %rax, %r8 +; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) ; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 ; AVX-NEXT: popq %rbx ; AVX-NEXT: retq @@ -982,52 +982,52 @@ ; AVX: # %bb.0: ; AVX-NEXT: pushq %rbx ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $56, %rax +; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: shrq $48, %rax ; AVX-NEXT: andl $15, %eax -; AVX-NEXT: movq %rdx, %rcx -; AVX-NEXT: shrq $48, %rcx -; AVX-NEXT: andl $15, %ecx -; AVX-NEXT: movq %rdx, %rsi -; AVX-NEXT: shrq $40, %rsi +; AVX-NEXT: movq %rcx, %rdx +; AVX-NEXT: shrq $40, %rdx +; AVX-NEXT: andl $15, %edx +; AVX-NEXT: movq %rcx, %rsi +; AVX-NEXT: shrq $32, %rsi ; AVX-NEXT: andl $15, %esi -; AVX-NEXT: movq %rdx, %r8 -; AVX-NEXT: shrq $32, %r8 -; AVX-NEXT: andl $15, %r8d ; AVX-NEXT: movq %rdi, %r9 -; AVX-NEXT: shrq $56, %r9 +; AVX-NEXT: shrq $48, %r9 ; AVX-NEXT: andl $15, %r9d ; AVX-NEXT: movq %rdi, %r10 -; AVX-NEXT: shrq $48, %r10 +; AVX-NEXT: shrq $40, %r10 ; AVX-NEXT: andl $15, %r10d ; AVX-NEXT: movq %rdi, %r11 -; AVX-NEXT: shrq $40, %r11 +; AVX-NEXT: shrq $32, %r11 ; AVX-NEXT: andl $15, %r11d +; AVX-NEXT: movq %rcx, %r8 +; AVX-NEXT: shrq $56, %r8 +; AVX-NEXT: andl $15, %r8d ; AVX-NEXT: movq %rdi, %rbx -; AVX-NEXT: shrq $32, %rbx +; AVX-NEXT: shrq $56, %rbx ; AVX-NEXT: andl $15, %ebx -; AVX-NEXT: shlq $32, %rbx +; AVX-NEXT: shlq $32, %r11 ; AVX-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; AVX-NEXT: orq %rbx, %rdi -; AVX-NEXT: shlq $40, %r11 -; AVX-NEXT: orq %rdi, %r11 -; AVX-NEXT: shlq $48, %r10 -; AVX-NEXT: orq %r11, %r10 -; AVX-NEXT: shlq $56, %r9 +; AVX-NEXT: orq %r11, %rdi +; AVX-NEXT: shlq $40, %r10 +; AVX-NEXT: orq %rdi, %r10 +; AVX-NEXT: shlq $48, %r9 ; AVX-NEXT: orq %r10, %r9 -; AVX-NEXT: movq %r9, -{{[0-9]+}}(%rsp) -; AVX-NEXT: shlq $32, %r8 -; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F -; AVX-NEXT: orq %r8, %rdx -; AVX-NEXT: shlq $40, %rsi -; AVX-NEXT: orq %rdx, %rsi -; AVX-NEXT: shlq $48, %rcx +; AVX-NEXT: shlq $56, %rbx +; AVX-NEXT: orq %r9, %rbx +; AVX-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) +; AVX-NEXT: shlq $32, %rsi +; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F ; AVX-NEXT: orq %rsi, %rcx -; AVX-NEXT: shlq $56, %rax -; AVX-NEXT: orq %rcx, %rax -; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: shlq $40, %rdx +; AVX-NEXT: orq %rcx, %rdx +; AVX-NEXT: shlq $48, %rax +; AVX-NEXT: orq %rdx, %rax +; AVX-NEXT: shlq $56, %r8 +; AVX-NEXT: orq %rax, %r8 +; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) ; AVX-NEXT: vinsertf128 $0, -{{[0-9]+}}(%rsp), %ymm0, %ymm0 ; AVX-NEXT: popq %rbx ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll --- a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -114,12 +114,8 @@ ; ; X64-LABEL: i56_or: ; X64: # %bb.0: -; X64-NEXT: movzbl 6(%rdi), %eax -; X64-NEXT: shll $16, %eax -; X64-NEXT: movzwl 4(%rdi), %ecx -; X64-NEXT: movw %cx, 4(%rdi) -; X64-NEXT: shrq $16, %rax -; X64-NEXT: movb %al, 6(%rdi) +; X64-NEXT: movzwl 4(%rdi), %eax +; X64-NEXT: movw %ax, 4(%rdi) ; X64-NEXT: orl $384, (%rdi) # imm = 0x180 ; X64-NEXT: retq %aa = load i56, ptr %a, align 1 diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -679,6 +679,8 @@ ; X64: # %bb.0: ; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovq %xmm0, %rax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax ; X64-NEXT: retq %3 = fcmp oeq double %0, %1 %4 = sext i1 %3 to i64 diff --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll --- a/llvm/test/CodeGen/X86/ushl_sat.ll +++ b/llvm/test/CodeGen/X86/ushl_sat.ll @@ -14,10 +14,9 @@ ; X64-LABEL: func: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll %cl, %eax -; X64-NEXT: movzwl %ax, %edx -; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %edi, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movzwl %dx, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: cmpw %ax, %di @@ -33,8 +32,7 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzwl %dx, %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movzwl %dx, %esi ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: cmpw %si, %ax ; X86-NEXT: movl $65535, %eax # imm = 0xFFFF @@ -54,8 +52,7 @@ ; X64-NEXT: addl %eax, %eax ; X64-NEXT: movl %eax, %edx ; X64-NEXT: shll %cl, %edx -; X64-NEXT: movzwl %dx, %edx -; X64-NEXT: movl %edx, %esi +; X64-NEXT: movzwl %dx, %esi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %esi ; X64-NEXT: cmpw %si, %ax @@ -74,8 +71,7 @@ ; X86-NEXT: addl %eax, %eax ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzwl %dx, %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movzwl %dx, %esi ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: cmpw %si, %ax ; X86-NEXT: movl $65535, %eax # imm = 0xFFFF @@ -100,8 +96,7 @@ ; X64-NEXT: addl %edi, %edi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shll %cl, %eax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movl %eax, %edx +; X64-NEXT: movzwl %ax, %edx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %edx ; X64-NEXT: cmpw %dx, %di @@ -121,8 +116,7 @@ ; X86-NEXT: addl %eax, %eax ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzwl %dx, %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movzwl %dx, %esi ; X86-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: cmpw %si, %ax diff --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll --- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll @@ -300,100 +300,91 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl %eax, %edi -; X86-NEXT: shll %cl, %edi -; X86-NEXT: movzwl %di, %ebx -; X86-NEXT: movl %ebx, %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: movzwl %bx, %edi ; X86-NEXT: shrl %cl, %edi ; X86-NEXT: cmpw %di, %ax ; X86-NEXT: movl $65535, %eax # imm = 0xFFFF ; X86-NEXT: cmovnel %eax, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %edi -; X86-NEXT: movb %ch, %cl -; X86-NEXT: shll %cl, %edi -; X86-NEXT: movzwl %di, %eax -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movzwl %ax, %edi ; X86-NEXT: shrl %cl, %edi ; X86-NEXT: cmpw %di, %si ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $65535, %edi # imm = 0xFFFF -; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: movl $65535, %esi # imm = 0xFFFF +; X86-NEXT: cmovnel %esi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movzwl %si, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: cmpw %si, %dx +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movzwl %ax, %edx +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: cmpw %dx, %bp ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: cmovnel %esi, %eax ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl $65535, %esi # imm = 0xFFFF -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzwl %dx, %ebp -; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: movzwl %bp, %edx ; X86-NEXT: shrl %cl, %edx ; X86-NEXT: cmpw %dx, %ax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmovnel %esi, %ebp -; X86-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movzwl %si, %ebx -; X86-NEXT: movl %ebx, %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: movzwl %bx, %esi ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: cmpw %si, %dx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovnel %eax, %ebx +; X86-NEXT: movl $65535, %esi # imm = 0xFFFF +; X86-NEXT: cmovnel %esi, %ebx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movzwl %si, %edi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: cmovnel %eax, %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movzwl %di, %eax +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: cmpw %ax, %dx +; X86-NEXT: cmovnel %esi, %edi ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzwl %dx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %edx -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: cmpw %dx, %ax -; X86-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-NEXT: cmovnel %eax, %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shll %cl, %edx -; X86-NEXT: movzwl %dx, %edx -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movzwl %dx, %eax ; X86-NEXT: shrl %cl, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: cmpw %ax, %si ; X86-NEXT: movl $65535, %eax # imm = 0xFFFF ; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movw %dx, 14(%eax) -; X86-NEXT: movw %si, 12(%eax) -; X86-NEXT: movw %di, 10(%eax) -; X86-NEXT: movw %bx, 8(%eax) -; X86-NEXT: movw %bp, 6(%eax) -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload -; X86-NEXT: movw %cx, 4(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movw %cx, 2(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movw %cx, (%eax) +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movzwl %ax, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmpw %si, %cx +; X86-NEXT: movl $65535, %ecx # imm = 0xFFFF +; X86-NEXT: cmovnel %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movw %ax, 14(%ecx) +; X86-NEXT: movw %dx, 12(%ecx) +; X86-NEXT: movw %di, 10(%ecx) +; X86-NEXT: movw %bx, 8(%ecx) +; X86-NEXT: movw %bp, 6(%ecx) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movw %ax, 4(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movw %ax, 2(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movw %ax, (%ecx) +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: addl $12, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi