diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -793,6 +793,13 @@ LBTC, LBTR, + /// RAO arithmetic instructions. + /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) + AADD, + AOR, + AXOR, + AAND, + // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31796,6 +31796,47 @@ return N; } + // We can lower add/sub/or/xor/and into RAO-INT instructions when the result + // is unused. + // TODO: RAO-INT instructions are weakly-ordered. But we may support other + // atomic orders with LFENCE/MFENCE/SFENCE. + // TODO: We can manually widen i8/i16 to i32 to use RAO-INT instruction iff + // they are aligned to 32 bits. + if (Subtarget.hasRAOINT() && + AN->getSuccessOrdering() == AtomicOrdering::Monotonic && + (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) { + if (Opc == ISD::ATOMIC_LOAD_SUB) { + RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, + AN->getMemOperand()); + } + unsigned NewOpc = 0; + switch (N->getOpcode()) { + case ISD::ATOMIC_LOAD_ADD: + NewOpc = X86ISD::AADD; + break; + case ISD::ATOMIC_LOAD_OR: + NewOpc = X86ISD::AOR; + break; + case ISD::ATOMIC_LOAD_XOR: + NewOpc = X86ISD::AXOR; + break; + case ISD::ATOMIC_LOAD_AND: + NewOpc = X86ISD::AAND; + break; + default: + llvm_unreachable("Unexpected ATOMIC_LOAD_ opcode"); + } + + MachineMemOperand *MMO = cast(N)->getMemOperand(); + SDValue RAO = DAG.getMemIntrinsicNode(NewOpc, DL, DAG.getVTList(MVT::Other), + {Chain, LHS, RHS}, VT, MMO); + + // NOTE: The getUNDEF is needed to give something for the unused result 0. + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT), + RAO); + } + // Specialized lowering for the canonical form of an idemptotent atomicrmw. // The core idea here is that since the memory location isn't actually // changing, all we need is a lowering for the *ordering* impacts of the @@ -33709,6 +33750,10 @@ NODE_NAME_CASE(LBTS) NODE_NAME_CASE(LBTC) NODE_NAME_CASE(LBTR) + NODE_NAME_CASE(AADD) + NODE_NAME_CASE(AOR) + NODE_NAME_CASE(AXOR) + NODE_NAME_CASE(AAND) NODE_NAME_CASE(VZEXT_MOVL) NODE_NAME_CASE(VZEXT_LOAD) NODE_NAME_CASE(VEXTRACT_STORE) diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td --- a/llvm/lib/Target/X86/X86InstrRAOINT.td +++ b/llvm/lib/Target/X86/X86InstrRAOINT.td @@ -13,19 +13,33 @@ //===----------------------------------------------------------------------===// // RAO-INT instructions + +def SDTRAOBinaryArith : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; + +def X86rao_add : SDNode<"X86ISD::AADD", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86rao_or : SDNode<"X86ISD::AOR", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86rao_xor : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86rao_and : SDNode<"X86ISD::AAND", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + multiclass RAOINT_BASE { let Predicates = [HasRAOINT] in def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - !strconcat(OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"), - []>, Sched<[WriteALURMW]>; + !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"), + [(!cast("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>, + Sched<[WriteALURMW]>; let Predicates = [HasRAOINT, In64BitMode] in def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - !strconcat(OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"), - []>, Sched<[WriteALURMW]>, REX_W; + !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"), + [(!cast("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>, + Sched<[WriteALURMW]>, REX_W; } -defm AADD : RAOINT_BASE<"aadd">, T8PS; -defm AAND : RAOINT_BASE<"aand">, T8PD; -defm AOR : RAOINT_BASE<"aor" >, T8XD; -defm AXOR : RAOINT_BASE<"axor">, T8XS; +defm AADD : RAOINT_BASE<"add">, T8PS; +defm AAND : RAOINT_BASE<"and">, T8PD; +defm AOR : RAOINT_BASE<"or" >, T8XD; +defm AXOR : RAOINT_BASE<"xor">, T8XS; diff --git a/llvm/test/CodeGen/X86/atomic-instructions-32.ll b/llvm/test/CodeGen/X86/atomic-instructions-32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-instructions-32.ll @@ -0,0 +1,392 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86-NO-RAOINT +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown -mattr=+raoint,+sse2 | FileCheck %s --check-prefixes=X86-RAO-INT +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64-NO-RAOINT +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X64-RAO-INT + +define i32 @atomic_add32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_add32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock xaddl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_add32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock addl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock addl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock addl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock addl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock xaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_add32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movl %esi, %eax +; X64-NO-RAOINT-NEXT: lock addl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xaddl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_add32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movl %esi, %eax +; X64-RAO-INT-NEXT: aaddl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock addl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock addl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock addl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock addl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock xaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw add i32* %p, i32 %val monotonic + %2 = atomicrmw add i32* %p, i32 %val acquire + %3 = atomicrmw add i32* %p, i32 %val release + %4 = atomicrmw add i32* %p, i32 %val acq_rel + %5 = atomicrmw add i32* %p, i32 %val seq_cst + + %6 = atomicrmw add i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_or32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_or32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: pushl %esi +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: movl (%edx), %eax +; X86-NO-RAOINT-NEXT: .p2align 4, 0x90 +; X86-NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start +; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NO-RAOINT-NEXT: movl %eax, %esi +; X86-NO-RAOINT-NEXT: orl %ecx, %esi +; X86-NO-RAOINT-NEXT: lock cmpxchgl %esi, (%edx) +; X86-NO-RAOINT-NEXT: jne .LBB1_1 +; X86-NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; X86-NO-RAOINT-NEXT: popl %esi +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_or32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: pushl %esi +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-RAO-INT-NEXT: aorl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock orl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock orl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock orl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock orl %ecx, (%edx) +; X86-RAO-INT-NEXT: movl (%edx), %eax +; X86-RAO-INT-NEXT: .p2align 4, 0x90 +; X86-RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start +; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-RAO-INT-NEXT: movl %eax, %esi +; X86-RAO-INT-NEXT: orl %ecx, %esi +; X86-RAO-INT-NEXT: lock cmpxchgl %esi, (%edx) +; X86-RAO-INT-NEXT: jne .LBB1_1 +; X86-RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; X86-RAO-INT-NEXT: popl %esi +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_or32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: movl (%rdi), %eax +; X64-NO-RAOINT-NEXT: .p2align 4, 0x90 +; X64-NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start +; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NO-RAOINT-NEXT: movl %eax, %ecx +; X64-NO-RAOINT-NEXT: orl %esi, %ecx +; X64-NO-RAOINT-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NO-RAOINT-NEXT: jne .LBB1_1 +; X64-NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_or32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: aorl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock orl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock orl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock orl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock orl %esi, (%rdi) +; X64-RAO-INT-NEXT: movl (%rdi), %eax +; X64-RAO-INT-NEXT: .p2align 4, 0x90 +; X64-RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start +; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-RAO-INT-NEXT: movl %eax, %ecx +; X64-RAO-INT-NEXT: orl %esi, %ecx +; X64-RAO-INT-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-RAO-INT-NEXT: jne .LBB1_1 +; X64-RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw or i32* %p, i32 %val monotonic + %2 = atomicrmw or i32* %p, i32 %val acquire + %3 = atomicrmw or i32* %p, i32 %val release + %4 = atomicrmw or i32* %p, i32 %val acq_rel + %5 = atomicrmw or i32* %p, i32 %val seq_cst + + %6 = atomicrmw or i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_xor32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_xor32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: pushl %esi +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: movl (%edx), %eax +; X86-NO-RAOINT-NEXT: .p2align 4, 0x90 +; X86-NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start +; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NO-RAOINT-NEXT: movl %eax, %esi +; X86-NO-RAOINT-NEXT: xorl %ecx, %esi +; X86-NO-RAOINT-NEXT: lock cmpxchgl %esi, (%edx) +; X86-NO-RAOINT-NEXT: jne .LBB2_1 +; X86-NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; X86-NO-RAOINT-NEXT: popl %esi +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_xor32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: pushl %esi +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-RAO-INT-NEXT: axorl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock xorl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock xorl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock xorl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock xorl %ecx, (%edx) +; X86-RAO-INT-NEXT: movl (%edx), %eax +; X86-RAO-INT-NEXT: .p2align 4, 0x90 +; X86-RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start +; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-RAO-INT-NEXT: movl %eax, %esi +; X86-RAO-INT-NEXT: xorl %ecx, %esi +; X86-RAO-INT-NEXT: lock cmpxchgl %esi, (%edx) +; X86-RAO-INT-NEXT: jne .LBB2_1 +; X86-RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; X86-RAO-INT-NEXT: popl %esi +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_xor32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: movl (%rdi), %eax +; X64-NO-RAOINT-NEXT: .p2align 4, 0x90 +; X64-NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start +; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NO-RAOINT-NEXT: movl %eax, %ecx +; X64-NO-RAOINT-NEXT: xorl %esi, %ecx +; X64-NO-RAOINT-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NO-RAOINT-NEXT: jne .LBB2_1 +; X64-NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_xor32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: axorl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock xorl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock xorl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock xorl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock xorl %esi, (%rdi) +; X64-RAO-INT-NEXT: movl (%rdi), %eax +; X64-RAO-INT-NEXT: .p2align 4, 0x90 +; X64-RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start +; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-RAO-INT-NEXT: movl %eax, %ecx +; X64-RAO-INT-NEXT: xorl %esi, %ecx +; X64-RAO-INT-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-RAO-INT-NEXT: jne .LBB2_1 +; X64-RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw xor i32* %p, i32 %val monotonic + %2 = atomicrmw xor i32* %p, i32 %val acquire + %3 = atomicrmw xor i32* %p, i32 %val release + %4 = atomicrmw xor i32* %p, i32 %val acq_rel + %5 = atomicrmw xor i32* %p, i32 %val seq_cst + + %6 = atomicrmw xor i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_and32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_and32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: pushl %esi +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%edx) +; X86-NO-RAOINT-NEXT: movl (%edx), %eax +; X86-NO-RAOINT-NEXT: .p2align 4, 0x90 +; X86-NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start +; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NO-RAOINT-NEXT: movl %eax, %esi +; X86-NO-RAOINT-NEXT: andl %ecx, %esi +; X86-NO-RAOINT-NEXT: lock cmpxchgl %esi, (%edx) +; X86-NO-RAOINT-NEXT: jne .LBB3_1 +; X86-NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; X86-NO-RAOINT-NEXT: popl %esi +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_and32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: pushl %esi +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-RAO-INT-NEXT: aandl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock andl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock andl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock andl %ecx, (%edx) +; X86-RAO-INT-NEXT: lock andl %ecx, (%edx) +; X86-RAO-INT-NEXT: movl (%edx), %eax +; X86-RAO-INT-NEXT: .p2align 4, 0x90 +; X86-RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start +; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-RAO-INT-NEXT: movl %eax, %esi +; X86-RAO-INT-NEXT: andl %ecx, %esi +; X86-RAO-INT-NEXT: lock cmpxchgl %esi, (%edx) +; X86-RAO-INT-NEXT: jne .LBB3_1 +; X86-RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; X86-RAO-INT-NEXT: popl %esi +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_and32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: movl (%rdi), %eax +; X64-NO-RAOINT-NEXT: .p2align 4, 0x90 +; X64-NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start +; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NO-RAOINT-NEXT: movl %eax, %ecx +; X64-NO-RAOINT-NEXT: andl %esi, %ecx +; X64-NO-RAOINT-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NO-RAOINT-NEXT: jne .LBB3_1 +; X64-NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_and32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: aandl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock andl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock andl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock andl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock andl %esi, (%rdi) +; X64-RAO-INT-NEXT: movl (%rdi), %eax +; X64-RAO-INT-NEXT: .p2align 4, 0x90 +; X64-RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start +; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-RAO-INT-NEXT: movl %eax, %ecx +; X64-RAO-INT-NEXT: andl %esi, %ecx +; X64-RAO-INT-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-RAO-INT-NEXT: jne .LBB3_1 +; X64-RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw and i32* %p, i32 %val monotonic + %2 = atomicrmw and i32* %p, i32 %val acquire + %3 = atomicrmw and i32* %p, i32 %val release + %4 = atomicrmw and i32* %p, i32 %val acq_rel + %5 = atomicrmw and i32* %p, i32 %val seq_cst + + %6 = atomicrmw and i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_sub32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_sub32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: negl %eax +; X86-NO-RAOINT-NEXT: lock xaddl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_sub32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-RAO-INT-NEXT: movl %edx, %eax +; X86-RAO-INT-NEXT: negl %eax +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock subl %edx, (%ecx) +; X86-RAO-INT-NEXT: lock subl %edx, (%ecx) +; X86-RAO-INT-NEXT: lock subl %edx, (%ecx) +; X86-RAO-INT-NEXT: lock subl %edx, (%ecx) +; X86-RAO-INT-NEXT: lock xaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_sub32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movl %esi, %eax +; X64-NO-RAOINT-NEXT: lock subl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: negl %eax +; X64-NO-RAOINT-NEXT: lock xaddl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_sub32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movl %esi, %eax +; X64-RAO-INT-NEXT: negl %eax +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: lock subl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock subl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock subl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock subl %esi, (%rdi) +; X64-RAO-INT-NEXT: lock xaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw sub i32* %p, i32 %val monotonic + %2 = atomicrmw sub i32* %p, i32 %val acquire + %3 = atomicrmw sub i32* %p, i32 %val release + %4 = atomicrmw sub i32* %p, i32 %val acq_rel + %5 = atomicrmw sub i32* %p, i32 %val seq_cst + + %6 = atomicrmw sub i32* %p, i32 %val seq_cst + ret i32 %6 +} diff --git a/llvm/test/CodeGen/X86/atomic-instructions-64.ll b/llvm/test/CodeGen/X86/atomic-instructions-64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-instructions-64.ll @@ -0,0 +1,207 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=RAO-INT +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=NO-RAOINT + +define i64 @atomic_add64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_add64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rsi, %rax +; RAO-INT-NEXT: aaddq %rsi, (%rdi) +; RAO-INT-NEXT: lock addq %rsi, (%rdi) +; RAO-INT-NEXT: lock addq %rsi, (%rdi) +; RAO-INT-NEXT: lock addq %rsi, (%rdi) +; RAO-INT-NEXT: lock addq %rsi, (%rdi) +; RAO-INT-NEXT: lock xaddq %rax, (%rdi) +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_add64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rsi, %rax +; NO-RAOINT-NEXT: lock addq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock addq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock addq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock addq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock addq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xaddq %rax, (%rdi) +; NO-RAOINT-NEXT: retq + %1 = atomicrmw add i64* %p, i64 %val monotonic + %2 = atomicrmw add i64* %p, i64 %val acquire + %3 = atomicrmw add i64* %p, i64 %val release + %4 = atomicrmw add i64* %p, i64 %val acq_rel + %5 = atomicrmw add i64* %p, i64 %val seq_cst + + %6 = atomicrmw add i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_or64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_or64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: aorq %rsi, (%rdi) +; RAO-INT-NEXT: lock orq %rsi, (%rdi) +; RAO-INT-NEXT: lock orq %rsi, (%rdi) +; RAO-INT-NEXT: lock orq %rsi, (%rdi) +; RAO-INT-NEXT: lock orq %rsi, (%rdi) +; RAO-INT-NEXT: movq (%rdi), %rax +; RAO-INT-NEXT: .p2align 4, 0x90 +; RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start +; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; RAO-INT-NEXT: movq %rax, %rcx +; RAO-INT-NEXT: orq %rsi, %rcx +; RAO-INT-NEXT: lock cmpxchgq %rcx, (%rdi) +; RAO-INT-NEXT: jne .LBB1_1 +; RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_or64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: movq (%rdi), %rax +; NO-RAOINT-NEXT: .p2align 4, 0x90 +; NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start +; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; NO-RAOINT-NEXT: movq %rax, %rcx +; NO-RAOINT-NEXT: orq %rsi, %rcx +; NO-RAOINT-NEXT: lock cmpxchgq %rcx, (%rdi) +; NO-RAOINT-NEXT: jne .LBB1_1 +; NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; NO-RAOINT-NEXT: retq + %1 = atomicrmw or i64* %p, i64 %val monotonic + %2 = atomicrmw or i64* %p, i64 %val acquire + %3 = atomicrmw or i64* %p, i64 %val release + %4 = atomicrmw or i64* %p, i64 %val acq_rel + %5 = atomicrmw or i64* %p, i64 %val seq_cst + + %6 = atomicrmw or i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_xor64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_xor64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: axorq %rsi, (%rdi) +; RAO-INT-NEXT: lock xorq %rsi, (%rdi) +; RAO-INT-NEXT: lock xorq %rsi, (%rdi) +; RAO-INT-NEXT: lock xorq %rsi, (%rdi) +; RAO-INT-NEXT: lock xorq %rsi, (%rdi) +; RAO-INT-NEXT: movq (%rdi), %rax +; RAO-INT-NEXT: .p2align 4, 0x90 +; RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start +; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; RAO-INT-NEXT: movq %rax, %rcx +; RAO-INT-NEXT: xorq %rsi, %rcx +; RAO-INT-NEXT: lock cmpxchgq %rcx, (%rdi) +; RAO-INT-NEXT: jne .LBB2_1 +; RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_xor64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: movq (%rdi), %rax +; NO-RAOINT-NEXT: .p2align 4, 0x90 +; NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start +; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; NO-RAOINT-NEXT: movq %rax, %rcx +; NO-RAOINT-NEXT: xorq %rsi, %rcx +; NO-RAOINT-NEXT: lock cmpxchgq %rcx, (%rdi) +; NO-RAOINT-NEXT: jne .LBB2_1 +; NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; NO-RAOINT-NEXT: retq + %1 = atomicrmw xor i64* %p, i64 %val monotonic + %2 = atomicrmw xor i64* %p, i64 %val acquire + %3 = atomicrmw xor i64* %p, i64 %val release + %4 = atomicrmw xor i64* %p, i64 %val acq_rel + %5 = atomicrmw xor i64* %p, i64 %val seq_cst + + %6 = atomicrmw xor i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_and64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_and64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: aandq %rsi, (%rdi) +; RAO-INT-NEXT: lock andq %rsi, (%rdi) +; RAO-INT-NEXT: lock andq %rsi, (%rdi) +; RAO-INT-NEXT: lock andq %rsi, (%rdi) +; RAO-INT-NEXT: lock andq %rsi, (%rdi) +; RAO-INT-NEXT: movq (%rdi), %rax +; RAO-INT-NEXT: .p2align 4, 0x90 +; RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start +; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; RAO-INT-NEXT: movq %rax, %rcx +; RAO-INT-NEXT: andq %rsi, %rcx +; RAO-INT-NEXT: lock cmpxchgq %rcx, (%rdi) +; RAO-INT-NEXT: jne .LBB3_1 +; RAO-INT-NEXT: # %bb.2: # %atomicrmw.end +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_and64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: movq (%rdi), %rax +; NO-RAOINT-NEXT: .p2align 4, 0x90 +; NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start +; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; NO-RAOINT-NEXT: movq %rax, %rcx +; NO-RAOINT-NEXT: andq %rsi, %rcx +; NO-RAOINT-NEXT: lock cmpxchgq %rcx, (%rdi) +; NO-RAOINT-NEXT: jne .LBB3_1 +; NO-RAOINT-NEXT: # %bb.2: # %atomicrmw.end +; NO-RAOINT-NEXT: retq + %1 = atomicrmw and i64* %p, i64 %val monotonic + %2 = atomicrmw and i64* %p, i64 %val acquire + %3 = atomicrmw and i64* %p, i64 %val release + %4 = atomicrmw and i64* %p, i64 %val acq_rel + %5 = atomicrmw and i64* %p, i64 %val seq_cst + + %6 = atomicrmw and i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_sub64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_sub64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rsi, %rax +; RAO-INT-NEXT: negq %rax +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: lock subq %rsi, (%rdi) +; RAO-INT-NEXT: lock subq %rsi, (%rdi) +; RAO-INT-NEXT: lock subq %rsi, (%rdi) +; RAO-INT-NEXT: lock subq %rsi, (%rdi) +; RAO-INT-NEXT: lock xaddq %rax, (%rdi) +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_sub64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rsi, %rax +; NO-RAOINT-NEXT: lock subq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock subq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock subq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock subq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock subq %rsi, (%rdi) +; NO-RAOINT-NEXT: negq %rax +; NO-RAOINT-NEXT: lock xaddq %rax, (%rdi) +; NO-RAOINT-NEXT: retq + %1 = atomicrmw sub i64* %p, i64 %val monotonic + %2 = atomicrmw sub i64* %p, i64 %val acquire + %3 = atomicrmw sub i64* %p, i64 %val release + %4 = atomicrmw sub i64* %p, i64 %val acq_rel + %5 = atomicrmw sub i64* %p, i64 %val seq_cst + + %6 = atomicrmw sub i64* %p, i64 %val seq_cst + ret i64 %6 +}