Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -87,7 +87,7 @@ cl::Hidden); static cl::opt ExperimentalUnorderedISEL( - "x86-experimental-unordered-atomic-isel", cl::init(false), + "x86-experimental-unordered-atomic-isel", cl::init(true), cl::desc("Use LoadSDNode and StoreSDNode instead of " "AtomicSDNode for unordered atomic loads and " "stores respectively."), Index: llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll +++ llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll @@ -12,23 +12,13 @@ define void @store_fp128(fp128* %fptr, fp128 %v) { ; X64-NOSSE-LABEL: store_fp128: ; X64-NOSSE: # %bb.0: -; X64-NOSSE-NEXT: pushq %rax -; X64-NOSSE-NEXT: .cfi_def_cfa_offset 16 -; X64-NOSSE-NEXT: callq __sync_lock_test_and_set_16 -; X64-NOSSE-NEXT: popq %rax -; X64-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X64-NOSSE-NEXT: movq %rdx, 8(%rdi) +; X64-NOSSE-NEXT: movq %rsi, (%rdi) ; X64-NOSSE-NEXT: retq ; ; X64-SSE-LABEL: store_fp128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: subq $24, %rsp -; X64-SSE-NEXT: .cfi_def_cfa_offset 32 -; X64-SSE-NEXT: movaps %xmm0, (%rsp) -; X64-SSE-NEXT: movq (%rsp), %rsi -; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-SSE-NEXT: callq __sync_lock_test_and_set_16 -; X64-SSE-NEXT: addq $24, %rsp -; X64-SSE-NEXT: .cfi_def_cfa_offset 8 +; X64-SSE-NEXT: movaps %xmm0, (%rdi) ; X64-SSE-NEXT: retq store atomic fp128 %v, fp128* %fptr unordered, align 16 ret void Index: llvm/test/CodeGen/X86/atomic-non-integer.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-non-integer.ll +++ llvm/test/CodeGen/X86/atomic-non-integer.ll @@ -114,12 +114,26 @@ } define void @store_float(float* %fptr, float %v) { -; X86-LABEL: store_float: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: retl +; X86-SSE-LABEL: store_float: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movss %xmm0, (%eax) +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: store_float: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovss %xmm0, (%eax) +; X86-AVX-NEXT: retl +; +; X86-NOSSE-LABEL: store_float: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: fstps (%eax) +; X86-NOSSE-NEXT: retl ; ; X64-SSE-LABEL: store_float: ; X64-SSE: # %bb.0: @@ -162,16 +176,16 @@ ; ; X86-SSE2-LABEL: store_double: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%eax) +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movsd %xmm0, (%eax) ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: store_double: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%eax) +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovsd %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X86-NOSSE-LABEL: store_double: @@ -215,87 +229,65 @@ define void @store_fp128(fp128* %fptr, fp128 %v) { ; X86-SSE-LABEL: store_fp128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: subl $36, %esp -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 36 -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: calll __sync_lock_test_and_set_16 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 -; X86-SSE-NEXT: addl $56, %esp -; X86-SSE-NEXT: .cfi_adjust_cfa_offset -56 +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE-NEXT: .cfi_offset %esi, -12 +; X86-SSE-NEXT: .cfi_offset %edi, -8 +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl %esi, 12(%edi) +; X86-SSE-NEXT: movl %edx, 8(%edi) +; X86-SSE-NEXT: movl %ecx, 4(%edi) +; X86-SSE-NEXT: movl %eax, (%edi) +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: store_fp128: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: subl $44, %esp -; X86-AVX-NEXT: .cfi_def_cfa_offset 48 -; X86-AVX-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 +; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: movl %eax, (%esp) -; X86-AVX-NEXT: calll __sync_lock_test_and_set_16 -; X86-AVX-NEXT: addl $40, %esp -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 +; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X86-NOSSE-LABEL: store_fp128: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: subl $36, %esp -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 36 -; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: calll __sync_lock_test_and_set_16 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -4 -; X86-NOSSE-NEXT: addl $56, %esp -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -56 +; X86-NOSSE-NEXT: pushl %edi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 +; X86-NOSSE-NEXT: .cfi_offset %esi, -12 +; X86-NOSSE-NEXT: .cfi_offset %edi, -8 +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOSSE-NEXT: movl %esi, 12(%edi) +; X86-NOSSE-NEXT: movl %edx, 8(%edi) +; X86-NOSSE-NEXT: movl %ecx, 4(%edi) +; X86-NOSSE-NEXT: movl %eax, (%edi) +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: popl %edi +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; ; X64-SSE-LABEL: store_fp128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: subq $24, %rsp -; X64-SSE-NEXT: .cfi_def_cfa_offset 32 -; X64-SSE-NEXT: movaps %xmm0, (%rsp) -; X64-SSE-NEXT: movq (%rsp), %rsi -; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-SSE-NEXT: callq __sync_lock_test_and_set_16 -; X64-SSE-NEXT: addq $24, %rsp -; X64-SSE-NEXT: .cfi_def_cfa_offset 8 +; X64-SSE-NEXT: movaps %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: store_fp128: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: subq $24, %rsp -; X64-AVX-NEXT: .cfi_def_cfa_offset 32 -; X64-AVX-NEXT: vmovaps %xmm0, (%rsp) -; X64-AVX-NEXT: movq (%rsp), %rsi -; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-AVX-NEXT: callq __sync_lock_test_and_set_16 -; X64-AVX-NEXT: addq $24, %rsp -; X64-AVX-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX-NEXT: retq store atomic fp128 %v, fp128* %fptr unordered, align 16 ret void @@ -383,53 +375,11 @@ } define float @load_float(float* %fptr) { -; X86-SSE1-LABEL: load_float: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: pushl %eax -; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE1-NEXT: movl (%eax), %eax -; X86-SSE1-NEXT: movl %eax, (%esp) -; X86-SSE1-NEXT: flds (%esp) -; X86-SSE1-NEXT: popl %eax -; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: load_float: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %eax -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE2-NEXT: movss %xmm0, (%esp) -; X86-SSE2-NEXT: flds (%esp) -; X86-SSE2-NEXT: popl %eax -; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE2-NEXT: retl -; -; X86-AVX-LABEL: load_float: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %eax -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: vmovss %xmm0, (%esp) -; X86-AVX-NEXT: flds (%esp) -; X86-AVX-NEXT: popl %eax -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 -; X86-AVX-NEXT: retl -; -; X86-NOSSE-LABEL: load_float: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl (%eax), %eax -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: flds (%esp) -; X86-NOSSE-NEXT: popl %eax -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 -; X86-NOSSE-NEXT: retl +; X86-LABEL: load_float: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: flds (%eax) +; X86-NEXT: retl ; ; X64-SSE-LABEL: load_float: ; X64-SSE: # %bb.0: @@ -445,61 +395,11 @@ } define double @load_double(double* %fptr) { -; X86-SSE1-LABEL: load_double: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: subl $20, %esp -; X86-SSE1-NEXT: .cfi_def_cfa_offset 24 -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE1-NEXT: fildll (%eax) -; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: movl %eax, (%esp) -; X86-SSE1-NEXT: fldl (%esp) -; X86-SSE1-NEXT: addl $20, %esp -; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE1-NEXT: retl -; -; X86-SSE2-LABEL: load_double: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: subl $12, %esp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, (%esp) -; X86-SSE2-NEXT: fldl (%esp) -; X86-SSE2-NEXT: addl $12, %esp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE2-NEXT: retl -; -; X86-AVX-LABEL: load_double: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: subl $12, %esp -; X86-AVX-NEXT: .cfi_def_cfa_offset 16 -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, (%esp) -; X86-AVX-NEXT: fldl (%esp) -; X86-AVX-NEXT: addl $12, %esp -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 -; X86-AVX-NEXT: retl -; -; X86-NOSSE-LABEL: load_double: -; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: subl $20, %esp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: fildll (%eax) -; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: fldl (%esp) -; X86-NOSSE-NEXT: addl $20, %esp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 -; X86-NOSSE-NEXT: retl +; X86-LABEL: load_double: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldl (%eax) +; X86-NEXT: retl ; ; X64-SSE-LABEL: load_double: ; X64-SSE: # %bb.0: @@ -515,85 +415,44 @@ } define fp128 @load_fp128(fp128* %fptr) { -; X86-SSE-LABEL: load_fp128: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: .cfi_def_cfa_offset 12 -; X86-SSE-NEXT: subl $20, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 32 -; X86-SSE-NEXT: .cfi_offset %esi, -12 -; X86-SSE-NEXT: .cfi_offset %edi, -8 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: subl $8, %esp -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 8 -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl $0 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-SSE-NEXT: calll __sync_val_compare_and_swap_16 -; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 -; X86-SSE-NEXT: addl $44, %esp -; X86-SSE-NEXT: .cfi_adjust_cfa_offset -44 -; X86-SSE-NEXT: movl (%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 12(%esi) -; X86-SSE-NEXT: movl %eax, (%esi) -; X86-SSE-NEXT: movl %ecx, 4(%esi) -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $20, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 12 -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: .cfi_def_cfa_offset 4 -; X86-SSE-NEXT: retl $4 +; X86-SSE1-LABEL: load_fp128: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %edi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: .cfi_offset %esi, -12 +; X86-SSE1-NEXT: .cfi_offset %edi, -8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl (%ecx), %edx +; X86-SSE1-NEXT: movl 4(%ecx), %esi +; X86-SSE1-NEXT: movl 8(%ecx), %edi +; X86-SSE1-NEXT: movl 12(%ecx), %ecx +; X86-SSE1-NEXT: movl %ecx, 12(%eax) +; X86-SSE1-NEXT: movl %edi, 8(%eax) +; X86-SSE1-NEXT: movl %esi, 4(%eax) +; X86-SSE1-NEXT: movl %edx, (%eax) +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: popl %edi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl $4 +; +; X86-SSE2-LABEL: load_fp128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movaps (%ecx), %xmm0 +; X86-SSE2-NEXT: movaps %xmm0, (%eax) +; X86-SSE2-NEXT: retl $4 ; ; X86-AVX-LABEL: load_fp128: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: subl $56, %esp -; X86-AVX-NEXT: .cfi_def_cfa_offset 64 -; X86-AVX-NEXT: .cfi_offset %esi, -8 -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: movl %eax, (%esp) -; X86-AVX-NEXT: vzeroupper -; X86-AVX-NEXT: calll __sync_val_compare_and_swap_16 -; X86-AVX-NEXT: subl $4, %esp -; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 -; X86-AVX-NEXT: vmovaps %xmm0, (%esi) -; X86-AVX-NEXT: movl %esi, %eax -; X86-AVX-NEXT: addl $56, %esp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: vmovaps (%ecx), %xmm0 +; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: retl $4 ; ; X86-NOSSE-LABEL: load_fp128: @@ -602,49 +461,18 @@ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: subl $20, %esp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 32 ; X86-NOSSE-NEXT: .cfi_offset %esi, -12 ; X86-NOSSE-NEXT: .cfi_offset %edi, -8 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: subl $8, %esp -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 8 -; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl $0 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NOSSE-NEXT: calll __sync_val_compare_and_swap_16 -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -4 -; X86-NOSSE-NEXT: addl $44, %esp -; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -44 -; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOSSE-NEXT: movl %edi, 8(%esi) -; X86-NOSSE-NEXT: movl %edx, 12(%esi) -; X86-NOSSE-NEXT: movl %eax, (%esi) -; X86-NOSSE-NEXT: movl %ecx, 4(%esi) -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: addl $20, %esp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 +; X86-NOSSE-NEXT: movl (%ecx), %edx +; X86-NOSSE-NEXT: movl 4(%ecx), %esi +; X86-NOSSE-NEXT: movl 8(%ecx), %edi +; X86-NOSSE-NEXT: movl 12(%ecx), %ecx +; X86-NOSSE-NEXT: movl %ecx, 12(%eax) +; X86-NOSSE-NEXT: movl %edi, 8(%eax) +; X86-NOSSE-NEXT: movl %esi, 4(%eax) +; X86-NOSSE-NEXT: movl %edx, (%eax) ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 ; X86-NOSSE-NEXT: popl %edi @@ -653,34 +481,12 @@ ; ; X64-SSE-LABEL: load_fp128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: subq $24, %rsp -; X64-SSE-NEXT: .cfi_def_cfa_offset 32 -; X64-SSE-NEXT: xorl %esi, %esi -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: xorl %ecx, %ecx -; X64-SSE-NEXT: xorl %r8d, %r8d -; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16 -; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movq %rax, (%rsp) -; X64-SSE-NEXT: movaps (%rsp), %xmm0 -; X64-SSE-NEXT: addq $24, %rsp -; X64-SSE-NEXT: .cfi_def_cfa_offset 8 +; X64-SSE-NEXT: movaps (%rdi), %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: load_fp128: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: subq $24, %rsp -; X64-AVX-NEXT: .cfi_def_cfa_offset 32 -; X64-AVX-NEXT: xorl %esi, %esi -; X64-AVX-NEXT: xorl %edx, %edx -; X64-AVX-NEXT: xorl %ecx, %ecx -; X64-AVX-NEXT: xorl %r8d, %r8d -; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16 -; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; X64-AVX-NEXT: movq %rax, (%rsp) -; X64-AVX-NEXT: vmovaps (%rsp), %xmm0 -; X64-AVX-NEXT: addq $24, %rsp -; X64-AVX-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX-NEXT: retq %v = load atomic fp128, fp128* %fptr unordered, align 16 ret fp128 %v Index: llvm/test/CodeGen/X86/atomic-unordered.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-unordered.ll +++ llvm/test/CodeGen/X86/atomic-unordered.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s -; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s -; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s -; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s +; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s +; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s define i8 @load_i8(i8* %ptr) { ; CHECK-LABEL: load_i8: Index: llvm/test/CodeGen/X86/atomic128.ll =================================================================== --- llvm/test/CodeGen/X86/atomic128.ll +++ llvm/test/CodeGen/X86/atomic128.ll @@ -893,25 +893,25 @@ ; ; CHECK32-LABEL: atomic_store_relaxed: ; CHECK32: # %bb.0: -; CHECK32-NEXT: subl $36, %esp -; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 -; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK32-NEXT: pushl %eax -; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK32-NEXT: calll __sync_lock_test_and_set_16 -; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 -; CHECK32-NEXT: addl $56, %esp -; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 +; CHECK32-NEXT: pushl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: .cfi_offset %esi, -12 +; CHECK32-NEXT: .cfi_offset %edi, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK32-NEXT: movl %esi, 12(%edi) +; CHECK32-NEXT: movl %edx, 8(%edi) +; CHECK32-NEXT: movl %ecx, 4(%edi) +; CHECK32-NEXT: movl %eax, (%edi) +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 ; CHECK32-NEXT: retl store atomic i128 %in, i128* %p unordered, align 16 ret void Index: llvm/test/CodeGen/X86/combineIncDecVector-crash.ll =================================================================== --- llvm/test/CodeGen/X86/combineIncDecVector-crash.ll +++ llvm/test/CodeGen/X86/combineIncDecVector-crash.ll @@ -19,11 +19,11 @@ ; CHECK-NEXT: callq newarray ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: addss (%rax), %xmm0 ; CHECK-NEXT: movdqu (%rax), %xmm1 ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 ; CHECK-NEXT: psubd %xmm2, %xmm1 ; CHECK-NEXT: movdqu %xmm1, (%rax) +; CHECK-NEXT: addss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: movss %xmm0, (%rax) bci_0: %token418 = call token (i64, i32, i8 * (i64, i32, i32, i32)*, i32,