diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp --- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp @@ -549,7 +549,7 @@ // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. - bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); + bool SlowPUSHrmm = STI->slowTwoMemOps(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll --- a/llvm/test/CodeGen/X86/atomic-idempotent.ll +++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64 -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SSE2 +; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM +; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM +; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM +; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-ATOM ; On x86, an atomic rmw operation that does not modify the value in memory ; (such as atomic add 0) can be replaced by an mfence followed by a mov. @@ -14,12 +18,30 @@ ; X64-NEXT: movb (%rdi), %al ; X64-NEXT: retq ; -; X86-LABEL: add8: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mfence -; X86-NEXT: movb (%eax), %al -; X86-NEXT: retl +; X86-SSE2-LABEL: add8: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: mfence +; X86-SSE2-NEXT: movb (%eax), %al +; X86-SSE2-NEXT: retl +; +; X86-SLM-LABEL: add8: +; X86-SLM: # %bb.0: +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLM-NEXT: xorl %eax, %eax +; X86-SLM-NEXT: lock xaddb %al, (%ecx) +; X86-SLM-NEXT: # kill: def $al killed $al killed $eax +; X86-SLM-NEXT: retl +; +; X86-ATOM-LABEL: add8: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-ATOM-NEXT: xorl %eax, %eax +; X86-ATOM-NEXT: lock xaddb %al, (%ecx) +; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl %1 = atomicrmw add i8* %p, i8 0 monotonic ret i8 %1 } @@ -31,12 +53,36 @@ ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: retq ; -; X86-LABEL: or16: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mfence -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: retl +; X86-SSE2-LABEL: or16: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: mfence +; X86-SSE2-NEXT: movzwl (%eax), %eax +; X86-SSE2-NEXT: retl +; +; X86-SLM-LABEL: or16: +; X86-SLM: # %bb.0: +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLM-NEXT: movzwl (%ecx), %eax +; X86-SLM-NEXT: .p2align 4, 0x90 +; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start +; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx) +; X86-SLM-NEXT: jne .LBB1_1 +; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end +; X86-SLM-NEXT: retl +; +; X86-ATOM-LABEL: or16: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-ATOM-NEXT: movzwl (%ecx), %eax +; X86-ATOM-NEXT: .p2align 4, 0x90 +; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start +; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx) +; X86-ATOM-NEXT: jne .LBB1_1 +; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end +; X86-ATOM-NEXT: retl %1 = atomicrmw or i16* %p, i16 0 acquire ret i16 %1 } @@ -48,12 +94,36 @@ ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: retq ; -; X86-LABEL: xor32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mfence -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: retl +; X86-SSE2-LABEL: xor32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: mfence +; X86-SSE2-NEXT: movl (%eax), %eax +; X86-SSE2-NEXT: retl +; +; X86-SLM-LABEL: xor32: +; X86-SLM: # %bb.0: +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLM-NEXT: movl (%ecx), %eax +; X86-SLM-NEXT: .p2align 4, 0x90 +; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start +; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx) +; X86-SLM-NEXT: jne .LBB2_1 +; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end +; X86-SLM-NEXT: retl +; +; X86-ATOM-LABEL: xor32: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-ATOM-NEXT: movl (%ecx), %eax +; X86-ATOM-NEXT: .p2align 4, 0x90 +; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start +; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx) +; X86-ATOM-NEXT: jne .LBB2_1 +; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end +; X86-ATOM-NEXT: retl %1 = atomicrmw xor i32* %p, i32 0 release ret i32 %1 } @@ -105,44 +175,124 @@ ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; -; X86-LABEL: or128: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -16 -; X86-NEXT: .cfi_offset %edi, -12 -; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: movl %esp, %eax -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl 12(%ebp) -; X86-NEXT: pushl %eax -; X86-NEXT: calll __sync_fetch_and_or_16 -; X86-NEXT: addl $20, %esp -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, 8(%esi) -; X86-NEXT: movl %edx, 12(%esi) -; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: movl %ecx, 4(%esi) -; X86-NEXT: movl %esi, %eax -; X86-NEXT: leal -8(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 -; X86-NEXT: retl $4 +; X86-SSE2-LABEL: or128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: .cfi_offset %esi, -16 +; X86-SSE2-NEXT: .cfi_offset %edi, -12 +; X86-SSE2-NEXT: movl 8(%ebp), %esi +; X86-SSE2-NEXT: movl %esp, %eax +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: calll __sync_fetch_and_or_16 +; X86-SSE2-NEXT: addl $20, %esp +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE2-NEXT: movl %edi, 8(%esi) +; X86-SSE2-NEXT: movl %edx, 12(%esi) +; X86-SSE2-NEXT: movl %eax, (%esi) +; X86-SSE2-NEXT: movl %ecx, 4(%esi) +; X86-SSE2-NEXT: movl %esi, %eax +; X86-SSE2-NEXT: leal -8(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %edi +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl $4 +; +; X86-SLM-LABEL: or128: +; X86-SLM: # %bb.0: +; X86-SLM-NEXT: pushl %ebp +; X86-SLM-NEXT: .cfi_def_cfa_offset 8 +; X86-SLM-NEXT: .cfi_offset %ebp, -8 +; X86-SLM-NEXT: movl %esp, %ebp +; X86-SLM-NEXT: .cfi_def_cfa_register %ebp +; X86-SLM-NEXT: pushl %edi +; X86-SLM-NEXT: pushl %esi +; X86-SLM-NEXT: andl $-8, %esp +; X86-SLM-NEXT: subl $16, %esp +; X86-SLM-NEXT: .cfi_offset %esi, -16 +; X86-SLM-NEXT: .cfi_offset %edi, -12 +; X86-SLM-NEXT: movl 8(%ebp), %esi +; X86-SLM-NEXT: movl 12(%ebp), %eax +; X86-SLM-NEXT: movl %esp, %ecx +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl %eax +; X86-SLM-NEXT: pushl %ecx +; X86-SLM-NEXT: calll __sync_fetch_and_or_16 +; X86-SLM-NEXT: addl $20, %esp +; X86-SLM-NEXT: movl (%esp), %eax +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLM-NEXT: movl %edi, 8(%esi) +; X86-SLM-NEXT: movl %edx, 12(%esi) +; X86-SLM-NEXT: movl %eax, (%esi) +; X86-SLM-NEXT: movl %ecx, 4(%esi) +; X86-SLM-NEXT: movl %esi, %eax +; X86-SLM-NEXT: leal -8(%ebp), %esp +; X86-SLM-NEXT: popl %esi +; X86-SLM-NEXT: popl %edi +; X86-SLM-NEXT: popl %ebp +; X86-SLM-NEXT: .cfi_def_cfa %esp, 4 +; X86-SLM-NEXT: retl $4 +; +; X86-ATOM-LABEL: or128: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: pushl %ebp +; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 +; X86-ATOM-NEXT: .cfi_offset %ebp, -8 +; X86-ATOM-NEXT: leal (%esp), %ebp +; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp +; X86-ATOM-NEXT: pushl %edi +; X86-ATOM-NEXT: pushl %esi +; X86-ATOM-NEXT: andl $-8, %esp +; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp +; X86-ATOM-NEXT: .cfi_offset %esi, -16 +; X86-ATOM-NEXT: .cfi_offset %edi, -12 +; X86-ATOM-NEXT: movl 8(%ebp), %esi +; X86-ATOM-NEXT: movl 12(%ebp), %eax +; X86-ATOM-NEXT: movl %esp, %ecx +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl %eax +; X86-ATOM-NEXT: pushl %ecx +; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 +; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp +; X86-ATOM-NEXT: movl (%esp), %ecx +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-ATOM-NEXT: movl %eax, 8(%esi) +; X86-ATOM-NEXT: movl %edi, 12(%esi) +; X86-ATOM-NEXT: movl %ecx, (%esi) +; X86-ATOM-NEXT: movl %esi, %eax +; X86-ATOM-NEXT: movl %edx, 4(%esi) +; X86-ATOM-NEXT: leal -8(%ebp), %esp +; X86-ATOM-NEXT: popl %esi +; X86-ATOM-NEXT: popl %edi +; X86-ATOM-NEXT: popl %ebp +; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 +; X86-ATOM-NEXT: retl $4 %1 = atomicrmw or i128* %p, i128 0 monotonic ret i128 %1 } @@ -155,49 +305,137 @@ ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: retq ; -; X86-LABEL: and32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mfence -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: retl +; X86-SSE2-LABEL: and32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: mfence +; X86-SSE2-NEXT: movl (%eax), %eax +; X86-SSE2-NEXT: retl +; +; X86-SLM-LABEL: and32: +; X86-SLM: # %bb.0: +; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLM-NEXT: movl (%ecx), %eax +; X86-SLM-NEXT: .p2align 4, 0x90 +; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start +; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx) +; X86-SLM-NEXT: jne .LBB5_1 +; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end +; X86-SLM-NEXT: retl +; +; X86-ATOM-LABEL: and32: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-ATOM-NEXT: movl (%ecx), %eax +; X86-ATOM-NEXT: .p2align 4, 0x90 +; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start +; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx) +; X86-ATOM-NEXT: jne .LBB5_1 +; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end +; X86-ATOM-NEXT: retl %1 = atomicrmw and i32* %p, i32 -1 acq_rel ret i32 %1 } define void @or32_nouse_monotonic(i32* %p) { -; CHECK-LABEL: or32_nouse_monotonic: -; CHECK: # %bb.0: -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: ret{{[l|q]}} +; X64-LABEL: or32_nouse_monotonic: +; X64: # %bb.0: +; X64-NEXT: #MEMBARRIER +; X64-NEXT: retq +; +; X86-GENERIC-LABEL: or32_nouse_monotonic: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or32_nouse_monotonic: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i32* %p, i32 0 monotonic ret void } define void @or32_nouse_acquire(i32* %p) { -; CHECK-LABEL: or32_nouse_acquire: -; CHECK: # %bb.0: -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: ret{{[l|q]}} +; X64-LABEL: or32_nouse_acquire: +; X64: # %bb.0: +; X64-NEXT: #MEMBARRIER +; X64-NEXT: retq +; +; X86-GENERIC-LABEL: or32_nouse_acquire: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or32_nouse_acquire: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i32* %p, i32 0 acquire ret void } define void @or32_nouse_release(i32* %p) { -; CHECK-LABEL: or32_nouse_release: -; CHECK: # %bb.0: -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: ret{{[l|q]}} +; X64-LABEL: or32_nouse_release: +; X64: # %bb.0: +; X64-NEXT: #MEMBARRIER +; X64-NEXT: retq +; +; X86-GENERIC-LABEL: or32_nouse_release: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or32_nouse_release: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i32* %p, i32 0 release ret void } define void @or32_nouse_acq_rel(i32* %p) { -; CHECK-LABEL: or32_nouse_acq_rel: -; CHECK: # %bb.0: -; CHECK-NEXT: #MEMBARRIER -; CHECK-NEXT: ret{{[l|q]}} +; X64-LABEL: or32_nouse_acq_rel: +; X64: # %bb.0: +; X64-NEXT: #MEMBARRIER +; X64-NEXT: retq +; +; X86-GENERIC-LABEL: or32_nouse_acq_rel: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: #MEMBARRIER +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or32_nouse_acq_rel: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: #MEMBARRIER +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i32* %p, i32 0 acq_rel ret void } @@ -208,10 +446,21 @@ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; -; X86-LABEL: or32_nouse_seq_cst: -; X86: # %bb.0: -; X86-NEXT: lock orl $0, (%esp) -; X86-NEXT: retl +; X86-GENERIC-LABEL: or32_nouse_seq_cst: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: lock orl $0, (%esp) +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or32_nouse_seq_cst: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: lock orl $0, (%esp) +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i32* %p, i32 0 seq_cst ret void } @@ -264,28 +513,76 @@ ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; -; X86-LABEL: or128_nouse_seq_cst: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl %esp, %eax -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl 8(%ebp) -; X86-NEXT: pushl %eax -; X86-NEXT: calll __sync_fetch_and_or_16 -; X86-NEXT: addl $20, %esp -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 -; X86-NEXT: retl +; X86-SSE2-LABEL: or128_nouse_seq_cst: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movl %esp, %eax +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl $0 +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: calll __sync_fetch_and_or_16 +; X86-SSE2-NEXT: addl $20, %esp +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-SLM-LABEL: or128_nouse_seq_cst: +; X86-SLM: # %bb.0: +; X86-SLM-NEXT: pushl %ebp +; X86-SLM-NEXT: .cfi_def_cfa_offset 8 +; X86-SLM-NEXT: .cfi_offset %ebp, -8 +; X86-SLM-NEXT: movl %esp, %ebp +; X86-SLM-NEXT: .cfi_def_cfa_register %ebp +; X86-SLM-NEXT: andl $-8, %esp +; X86-SLM-NEXT: subl $16, %esp +; X86-SLM-NEXT: movl 8(%ebp), %eax +; X86-SLM-NEXT: movl %esp, %ecx +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl $0 +; X86-SLM-NEXT: pushl %eax +; X86-SLM-NEXT: pushl %ecx +; X86-SLM-NEXT: calll __sync_fetch_and_or_16 +; X86-SLM-NEXT: addl $20, %esp +; X86-SLM-NEXT: movl %ebp, %esp +; X86-SLM-NEXT: popl %ebp +; X86-SLM-NEXT: .cfi_def_cfa %esp, 4 +; X86-SLM-NEXT: retl +; +; X86-ATOM-LABEL: or128_nouse_seq_cst: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: pushl %ebp +; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 +; X86-ATOM-NEXT: .cfi_offset %ebp, -8 +; X86-ATOM-NEXT: leal (%esp), %ebp +; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp +; X86-ATOM-NEXT: andl $-8, %esp +; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp +; X86-ATOM-NEXT: movl 8(%ebp), %eax +; X86-ATOM-NEXT: movl %esp, %ecx +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: pushl %eax +; X86-ATOM-NEXT: pushl %ecx +; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 +; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp +; X86-ATOM-NEXT: movl %ebp, %esp +; X86-ATOM-NEXT: popl %ebp +; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 +; X86-ATOM-NEXT: retl atomicrmw or i128* %p, i128 0 seq_cst ret void } @@ -297,10 +594,21 @@ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; -; X86-LABEL: or16_nouse_seq_cst: -; X86: # %bb.0: -; X86-NEXT: lock orl $0, (%esp) -; X86-NEXT: retl +; X86-GENERIC-LABEL: or16_nouse_seq_cst: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: lock orl $0, (%esp) +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or16_nouse_seq_cst: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: lock orl $0, (%esp) +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i16* %p, i16 0 seq_cst ret void } @@ -311,10 +619,21 @@ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; -; X86-LABEL: or8_nouse_seq_cst: -; X86: # %bb.0: -; X86-NEXT: lock orl $0, (%esp) -; X86-NEXT: retl +; X86-GENERIC-LABEL: or8_nouse_seq_cst: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: lock orl $0, (%esp) +; X86-GENERIC-NEXT: retl +; +; X86-ATOM-LABEL: or8_nouse_seq_cst: +; X86-ATOM: # %bb.0: +; X86-ATOM-NEXT: lock orl $0, (%esp) +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: nop +; X86-ATOM-NEXT: retl atomicrmw or i8* %p, i8 0 seq_cst ret void }