diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32374,11 +32374,19 @@ TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const { + using namespace llvm::PatternMatch; // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. if (AI->use_empty()) return AtomicExpansionKind::None; + if (AI->getOperation() == AtomicRMWInst::Xor) { + // A ^ SignBit -> A + SignBit. This allows us to use `xadd` which is + // preferable to both `cmpxchg` and `btc`. + if (match(AI->getOperand(1), m_SignMask())) + return AtomicExpansionKind::None; + } + // If the atomicrmw's result is used by a single bit AND, we may use // bts/btr/btc instruction for these operations. // Note: InstCombinePass can cause a de-optimization here. It replaces the @@ -33368,10 +33376,13 @@ if (N->hasAnyUseOfValue(0)) { // Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to // select LXADD if LOCK_SUB can't be selected. - if (Opc == ISD::ATOMIC_LOAD_SUB) { + // Handle (atomic_load_xor p, SignBit) as (atomic_load_add p, SignBit) so we + // can use LXADD as opposed to cmpxchg. + if (Opc == ISD::ATOMIC_LOAD_SUB || + (Opc == ISD::ATOMIC_LOAD_XOR && isMinSignedConstant(RHS))) { RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); - return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, - RHS, AN->getMemOperand()); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, + AN->getMemOperand()); } assert(Opc == ISD::ATOMIC_LOAD_ADD && "Used AtomicRMW ops other than Add should have been expanded!"); diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll --- a/llvm/test/CodeGen/X86/atomic-bit-test.ll +++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll @@ -183,19 +183,17 @@ define i16 @btc15() nounwind { ; X86-LABEL: btc15: ; X86: # %bb.0: # %entry -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: lock btcw $15, v16 -; X86-NEXT: setb %al -; X86-NEXT: shll $15, %eax +; X86-NEXT: movw $-32768, %ax # imm = 0x8000 +; X86-NEXT: lock xaddw %ax, v16 +; X86-NEXT: andl $32768, %eax # imm = 0x8000 ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc15: ; X64: # %bb.0: # %entry -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: lock btcw $15, v16(%rip) -; X64-NEXT: setb %al -; X64-NEXT: shll $15, %eax +; X64-NEXT: movw $-32768, %ax # imm = 0x8000 +; X64-NEXT: lock xaddw %ax, v16(%rip) +; X64-NEXT: andl $32768, %eax # imm = 0x8000 ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -207,18 +205,16 @@ define i32 @btc31() nounwind { ; X86-LABEL: btc31: ; X86: # %bb.0: # %entry -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: lock btcl $31, v32 -; X86-NEXT: setb %al -; X86-NEXT: shll $31, %eax +; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: lock xaddl %eax, v32 +; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 ; X86-NEXT: retl ; ; X64-LABEL: btc31: ; X64: # %bb.0: # %entry -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: lock btcl $31, v32(%rip) -; X64-NEXT: setb %al -; X64-NEXT: shll $31, %eax +; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: lock xaddl %eax, v32(%rip) +; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 ; X64-NEXT: retq entry: %0 = atomicrmw xor ptr @v32, i32 2147483648 monotonic, align 4 @@ -251,10 +247,10 @@ ; ; X64-LABEL: btc63: ; X64: # %bb.0: # %entry -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: lock btcq $63, v64(%rip) -; X64-NEXT: setb %al -; X64-NEXT: shlq $63, %rax +; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: lock xaddq %rax, v64(%rip) +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: retq entry: %0 = atomicrmw xor ptr @v64, i64 -9223372036854775808 monotonic, align 8 diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll --- a/llvm/test/CodeGen/X86/atomic-xor.ll +++ b/llvm/test/CodeGen/X86/atomic-xor.ll @@ -85,16 +85,8 @@ ; ; X64-LABEL: xor64_signbit_used: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB2_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: xorq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, (%rdi) -; X64-NEXT: jne .LBB2_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-NEXT: lock xaddq %rax, (%rdi) ; X64-NEXT: retq %r = atomicrmw xor ptr %p, i64 9223372036854775808 monotonic ret i64 %r @@ -104,29 +96,14 @@ ; X86-LABEL: xor32_signbit_used: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB3_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: leal -2147483648(%eax), %edx -; X86-NEXT: lock cmpxchgl %edx, (%ecx) -; X86-NEXT: jne .LBB3_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: lock xaddl %eax, (%ecx) ; X86-NEXT: retl ; ; X64-LABEL: xor32_signbit_used: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: leal -2147483648(%rax), %ecx -; X64-NEXT: # kill: def $eax killed $eax killed $rax -; X64-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-NEXT: # kill: def $eax killed $eax def $rax -; X64-NEXT: jne .LBB3_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: lock xaddl %eax, (%rdi) ; X64-NEXT: retq %r = atomicrmw xor ptr %p, i32 2147483648 monotonic ret i32 %r @@ -136,34 +113,14 @@ ; X86-LABEL: xor16_signbit_used: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB4_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl $32768, %edx # imm = 0x8000 -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %dx, (%ecx) -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB4_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: movw $-32768, %ax # imm = 0x8000 +; X86-NEXT: lock xaddw %ax, (%ecx) ; X86-NEXT: retl ; ; X64-LABEL: xor16_signbit_used: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB4_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $32768, %ecx # imm = 0x8000 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, (%rdi) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB4_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: movw $-32768, %ax # imm = 0x8000 +; X64-NEXT: lock xaddw %ax, (%rdi) ; X64-NEXT: retq %r = atomicrmw xor ptr %p, i16 32768 monotonic ret i16 %r @@ -173,30 +130,14 @@ ; X86-LABEL: xor8_signbit_used: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl (%ecx), %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB5_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %edx -; X86-NEXT: addb $-128, %dl -; X86-NEXT: lock cmpxchgb %dl, (%ecx) -; X86-NEXT: jne .LBB5_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: movb $-128, %al +; X86-NEXT: lock xaddb %al, (%ecx) ; X86-NEXT: retl ; ; X64-LABEL: xor8_signbit_used: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rdi), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB5_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: leal -128(%rax), %ecx -; X64-NEXT: # kill: def $al killed $al killed $rax -; X64-NEXT: lock cmpxchgb %cl, (%rdi) -; X64-NEXT: # kill: def $al killed $al def $rax -; X64-NEXT: jne .LBB5_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: # kill: def $al killed $al killed $rax +; X64-NEXT: movb $-128, %al +; X64-NEXT: lock xaddb %al, (%rdi) ; X64-NEXT: retq %r = atomicrmw xor ptr %p, i8 128 monotonic ret i8 %r