diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -790,6 +790,9 @@ LOR, LXOR, LAND, + LBTS, + LBTC, + LBTR, // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, @@ -1640,6 +1643,8 @@ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLogRMWInIR(AtomicRMWInst *AI) const; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5671,7 +5671,12 @@ bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { - return true; + auto *AI = dyn_cast(AndI.getOperand(0)); + if (!AI) + return true; + AtomicRMWInst::BinOp Op = AI->getOperation(); + return Op != AtomicRMWInst::Or && Op != AtomicRMWInst::And && + Op != AtomicRMWInst::Xor; } bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { @@ -30134,6 +30139,33 @@ : AtomicExpansionKind::None; } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldExpandAtomicLogRMWInIR(AtomicRMWInst *AI) const { + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + if (AI->use_empty()) + return AtomicExpansionKind::None; + + // If the atomicrmw's result is used by a single bit AND, we may use + // bts/btr/btc instruction for these operations. + auto *C1 = dyn_cast(AI->getValOperand()); + Instruction *I = AI->user_back(); + if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And || + AI->getParent() != I->getParent()) + return AtomicExpansionKind::CmpXChg; + // The following instruction must be a AND single bit. + auto *C2 = dyn_cast(I->getOperand(1)); + unsigned Bits = AI->getType()->getPrimitiveSizeInBits(); + if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue())) + return AtomicExpansionKind::CmpXChg; + + if (AI->getOperation() == AtomicRMWInst::And) + return ~C1->getValue() == C2->getValue() ? AtomicExpansionKind::None + : AtomicExpansionKind::CmpXChg; + + return C1 == C2 ? AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg; +} + TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -30158,10 +30190,7 @@ case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: - // If the atomicrmw's result isn't actually used, we can just add a "lock" - // prefix to a normal instruction for these operations. - return !AI->use_empty() ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + return shouldExpandAtomicLogRMWInIR(AI); case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -30859,6 +30888,40 @@ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, AN->getMemOperand()); } + if (Opc == ISD::ATOMIC_LOAD_OR || Opc == ISD::ATOMIC_LOAD_XOR || + Opc == ISD::ATOMIC_LOAD_AND) { + ConstantSDNode *C1 = cast(RHS); + ConstantSDNode *C2 = nullptr; + for (auto UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { + // Skip uses of the chain value. Result 0 of the node is the load value. + if (UI.getUse().getResNo() != 0) + continue; + if (C2 || UI->getOpcode() != ISD::AND) + report_fatal_error("Atomic result must be used by one AND"); + C2 = cast(UI->getOperand(1)); + assert(isPowerOf2_64(C2->getZExtValue()) && "Must be power of 2 value"); + } + if (Opc == ISD::ATOMIC_LOAD_AND) { + assert(~C1->getAPIntValue() == C2->getAPIntValue() && + "Cannot lower to BTR"); + Opc = X86ISD::LBTR; + } else { + assert(C1 == C2 && "Cannot lower to BTS/BTC"); + Opc = Opc == ISD::ATOMIC_LOAD_OR ? X86ISD::LBTS : X86ISD::LBTC; + } + + unsigned Imm = countTrailingZeros(C2->getZExtValue()); + MachineMemOperand *MMO = cast(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode( + Opc, DL, DAG.getVTList(VT, MVT::Other), + {Chain, LHS, DAG.getConstant(Imm, DL, VT)}, VT, MMO); + Chain = Res.getValue(1); + Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); + if (Imm) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getShiftAmountConstant(Imm, VT, DL)); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Res, Chain); + } assert(Opc == ISD::ATOMIC_LOAD_ADD && "Used AtomicRMW ops other than Add should have been expanded!"); return N; @@ -32642,6 +32705,9 @@ NODE_NAME_CASE(LOR) NODE_NAME_CASE(LXOR) NODE_NAME_CASE(LAND) + NODE_NAME_CASE(LBTS) + NODE_NAME_CASE(LBTC) + NODE_NAME_CASE(LBTR) NODE_NAME_CASE(VZEXT_MOVL) NODE_NAME_CASE(VZEXT_LOAD) NODE_NAME_CASE(VEXTRACT_STORE) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -839,6 +839,28 @@ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } +multiclass ATOMIC_LOG { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteBitTestSetRegRMW] in { + def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i16imm:$src2), + !strconcat(mnemonic, "{w}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (frag addr:$src1, (i16 imm:$src2)))]>, + OpSize16, TB, LOCK; + def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i32imm:$src2), + !strconcat(mnemonic, "{l}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (frag addr:$src1, (i32 imm:$src2)))]>, + OpSize32, TB, LOCK; + def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i64imm:$src2), + !strconcat(mnemonic, "{q}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (frag addr:$src1, (i64 imm:$src2)))]>, + TB, LOCK; + } +} + +defm LOCK_BTS : ATOMIC_LOG; +defm LOCK_BTR : ATOMIC_LOG; +defm LOCK_BTC : ATOMIC_LOG; + // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -282,6 +282,15 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def X86lock_bts : SDNode<"X86ISD::LBTS", SDTLockBinaryArithWithFlags, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; +def X86lock_btc : SDNode<"X86ISD::LBTC", SDTLockBinaryArithWithFlags, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; +def X86lock_btr : SDNode<"X86ISD::LBTR", SDTLockBinaryArithWithFlags, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>; diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll @@ -0,0 +1,570 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 + +@v16 = dso_local global i16 0, align 2 +@v32 = dso_local global i32 0, align 4 +@v64 = dso_local global i64 0, align 8 + +define i16 @bts1() nounwind { +; X86-LABEL: bts1: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $0, v16 +; X86-NEXT: setb %al +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: bts1: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $0, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2 + %and = and i16 %0, 1 + ret i16 %and +} + +define i16 @bts2() nounwind { +; X86-LABEL: bts2: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: bts2: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i16* @v16, i16 2 monotonic, align 2 + %and = and i16 %0, 2 + ret i16 %and +} + +define i16 @bts15() nounwind { +; X86-LABEL: bts15: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: bts15: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i16* @v16, i16 32768 monotonic, align 2 + %and = and i16 %0, 32768 + ret i16 %and +} + +define i32 @bts31() nounwind { +; X86-LABEL: bts31: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax +; X86-NEXT: retl +; +; X64-LABEL: bts31: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4 + %and = and i32 %0, 2147483648 + ret i32 %and +} + +define i64 @bts63() nounwind { +; X86-LABEL: bts63: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-NEXT: movl v64+4, %edx +; X86-NEXT: movl v64, %eax +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB4_1: # %atomicrmw.start +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: lock cmpxchg8b v64 +; X86-NEXT: jne .LBB4_1 +; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: andl %esi, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: bts63: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8 + %and = and i64 %0, -9223372036854775808 + ret i64 %and +} + +define i16 @btc1() nounwind { +; X86-LABEL: btc1: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $0, v16 +; X86-NEXT: setb %al +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: btc1: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $0, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw xor i16* @v16, i16 1 monotonic, align 2 + %and = and i16 %0, 1 + ret i16 %and +} + +define i16 @btc2() nounwind { +; X86-LABEL: btc2: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: btc2: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw xor i16* @v16, i16 2 monotonic, align 2 + %and = and i16 %0, 2 + ret i16 %and +} + +define i16 @btc15() nounwind { +; X86-LABEL: btc15: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: btc15: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw xor i16* @v16, i16 32768 monotonic, align 2 + %and = and i16 %0, 32768 + ret i16 %and +} + +define i32 @btc31() nounwind { +; X86-LABEL: btc31: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax +; X86-NEXT: retl +; +; X64-LABEL: btc31: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax +; X64-NEXT: retq +entry: + %0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4 + %and = and i32 %0, 2147483648 + ret i32 %and +} + +define i64 @btc63() nounwind { +; X86-LABEL: btc63: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-NEXT: movl v64+4, %edx +; X86-NEXT: movl v64, %eax +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB9_1: # %atomicrmw.start +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: lock cmpxchg8b v64 +; X86-NEXT: jne .LBB9_1 +; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: andl %esi, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: btc63: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax +; X64-NEXT: retq +entry: + %0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8 + %and = and i64 %0, -9223372036854775808 + ret i64 %and +} + +define i16 @btr1() nounwind { +; X86-LABEL: btr1: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $0, v16 +; X86-NEXT: setb %al +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: btr1: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $0, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw and i16* @v16, i16 -2 monotonic, align 2 + %and = and i16 %0, 1 + ret i16 %and +} + +define i16 @btr2() nounwind { +; X86-LABEL: btr2: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: btr2: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw and i16* @v16, i16 -3 monotonic, align 2 + %and = and i16 %0, 2 + ret i16 %and +} + +define i16 @btr15() nounwind { +; X86-LABEL: btr15: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: btr15: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw and i16* @v16, i16 32767 monotonic, align 2 + %and = and i16 %0, 32768 + ret i16 %and +} + +define i32 @btr31() nounwind { +; X86-LABEL: btr31: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax +; X86-NEXT: retl +; +; X64-LABEL: btr31: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax +; X64-NEXT: retq +entry: + %0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4 + %and = and i32 %0, 2147483648 + ret i32 %and +} + +define i64 @btr63() nounwind { +; X86-LABEL: btr63: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF +; X86-NEXT: movl $-1, %edi +; X86-NEXT: movl v64+4, %edx +; X86-NEXT: movl v64, %eax +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB14_1: # %atomicrmw.start +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: andl %edi, %ebx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: andl %esi, %ecx +; X86-NEXT: lock cmpxchg8b v64 +; X86-NEXT: jne .LBB14_1 +; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: addl $1, %edi +; X86-NEXT: adcl $0, %esi +; X86-NEXT: andl %edi, %eax +; X86-NEXT: andl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: btr63: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax +; X64-NEXT: retq +entry: + %0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8 + %and = and i64 %0, -9223372036854775808 + ret i64 %and +} + +define i16 @multi_use1() nounwind { +; X86-LABEL: multi_use1: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl v16, %eax +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB15_1: # %atomicrmw.start +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl $1, %ecx +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: lock cmpxchgw %cx, v16 +; X86-NEXT: # kill: def $ax killed $ax def $eax +; X86-NEXT: jne .LBB15_1 +; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1, %ecx +; X86-NEXT: xorl $2, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: multi_use1: +; X64: # %bb.0: # %entry +; X64-NEXT: movzwl v16(%rip), %eax +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB15_1: # %atomicrmw.start +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: orl $1, %ecx +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: lock cmpxchgw %cx, v16(%rip) +; X64-NEXT: # kill: def $ax killed $ax def $eax +; X64-NEXT: jne .LBB15_1 +; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: xorl $2, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2 + %1 = and i16 %0, 1 + %2 = xor i16 %0, 2 + %3 = or i16 %1, %2 + ret i16 %3 +} + +define i16 @multi_use2() nounwind { +; X86-LABEL: multi_use2: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $0, v16 +; X86-NEXT: setb %al +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: multi_use2: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $0, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +entry: + %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2 + %1 = and i16 %0, 1 + %2 = shl i16 %1, 1 + %3 = or i16 %1, %2 + ret i16 %3 +} + +define i16 @use_in_diff_bb() nounwind { +; X86-LABEL: use_in_diff_bb: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: movzwl v16, %esi +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB17_1: # %atomicrmw.start +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: orl $1, %ecx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: lock cmpxchgw %cx, v16 +; X86-NEXT: movl %eax, %esi +; X86-NEXT: jne .LBB17_1 +; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %al, %al +; X86-NEXT: jne .LBB17_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: calll foo@PLT +; X86-NEXT: .LBB17_4: +; X86-NEXT: andl $1, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: use_in_diff_bb: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: movzwl v16(%rip), %ebx +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB17_1: # %atomicrmw.start +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movl %ebx, %ecx +; X64-NEXT: orl $1, %ecx +; X64-NEXT: movl %ebx, %eax +; X64-NEXT: lock cmpxchgw %cx, v16(%rip) +; X64-NEXT: movl %eax, %ebx +; X64-NEXT: jne .LBB17_1 +; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %al, %al +; X64-NEXT: jne .LBB17_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: callq foo@PLT +; X64-NEXT: .LBB17_4: +; X64-NEXT: andl $1, %ebx +; X64-NEXT: movl %ebx, %eax +; X64-NEXT: popq %rbx +; X64-NEXT: retq +entry: + %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2 + br i1 undef, label %1, label %2 +1: + call void @foo() + br label %3 +2: + br label %3 +3: + %and = and i16 %0, 1 + ret i16 %and +} + +declare void @foo() + +define void @no_and_cmp0_fold() nounwind { +; X86-LABEL: no_and_cmp0_fold: +; X86: # %bb.0: # %entry +; X86-NEXT: lock btsl $3, v32 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %al, %al +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.2: # %if.end +; X86-NEXT: retl +; X86-NEXT: .LBB18_1: # %if.then +; +; X64-LABEL: no_and_cmp0_fold: +; X64: # %bb.0: # %entry +; X64-NEXT: lock btsl $3, v32(%rip) +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %al, %al +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.2: # %if.end +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %if.then +entry: + %0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4 + %and = and i32 %0, 8 + %tobool = icmp ne i32 %and, 0 + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + unreachable + +if.end: ; preds = %entry + %or.cond8 = select i1 %tobool, i1 undef, i1 false + ret void +}