diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -790,6 +790,9 @@
     LOR,
     LXOR,
     LAND,
+    LBTS,
+    LBTC,
+    LBTR,
 
     // Load, scalar_to_vector, and zero extend.
     VZEXT_LOAD,
@@ -1640,6 +1643,8 @@
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicExpansionKind
+    shouldExpandAtomicLogRMWInIR(AtomicRMWInst *AI) const;
 
     LoadInst *
     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5671,7 +5671,12 @@
 
 bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
     const Instruction &AndI) const {
-  return true;
+  auto *AI = dyn_cast<AtomicRMWInst>(AndI.getOperand(0));
+  if (!AI)
+    return true;
+  AtomicRMWInst::BinOp Op = AI->getOperation();
+  return Op != AtomicRMWInst::Or && Op != AtomicRMWInst::And &&
+         Op != AtomicRMWInst::Xor;
 }
 
 bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
@@ -30134,6 +30139,33 @@
                                  : AtomicExpansionKind::None;
 }
 
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldExpandAtomicLogRMWInIR(AtomicRMWInst *AI) const {
+  // If the atomicrmw's result isn't actually used, we can just add a "lock"
+  // prefix to a normal instruction for these operations.
+  if (AI->use_empty())
+    return AtomicExpansionKind::None;
+
+  // If the atomicrmw's result is used by a single bit AND, we may use
+  // bts/btr/btc instruction for these operations.
+  auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
+  Instruction *I = AI->user_back();
+  if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And ||
+      AI->getParent() != I->getParent())
+    return AtomicExpansionKind::CmpXChg;
+  // The following instruction must be a AND single bit.
+  auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
+  unsigned Bits = AI->getType()->getPrimitiveSizeInBits();
+  if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue()))
+    return AtomicExpansionKind::CmpXChg;
+
+  if (AI->getOperation() == AtomicRMWInst::And)
+    return ~C1->getValue() == C2->getValue() ? AtomicExpansionKind::None
+                                             : AtomicExpansionKind::CmpXChg;
+
+  return C1 == C2 ? AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg;
+}
+
 TargetLowering::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -30158,10 +30190,7 @@
   case AtomicRMWInst::Or:
   case AtomicRMWInst::And:
   case AtomicRMWInst::Xor:
-    // If the atomicrmw's result isn't actually used, we can just add a "lock"
-    // prefix to a normal instruction for these operations.
-    return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
-                            : AtomicExpansionKind::None;
+    return shouldExpandAtomicLogRMWInIR(AI);
   case AtomicRMWInst::Nand:
   case AtomicRMWInst::Max:
   case AtomicRMWInst::Min:
@@ -30859,6 +30888,40 @@
       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
                            RHS, AN->getMemOperand());
     }
+    if (Opc == ISD::ATOMIC_LOAD_OR || Opc == ISD::ATOMIC_LOAD_XOR ||
+        Opc == ISD::ATOMIC_LOAD_AND) {
+      ConstantSDNode *C1 = cast<ConstantSDNode>(RHS);
+      ConstantSDNode *C2 = nullptr;
+      for (auto UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) {
+        // Skip uses of the chain value. Result 0 of the node is the load value.
+        if (UI.getUse().getResNo() != 0)
+          continue;
+        if (C2 || UI->getOpcode() != ISD::AND)
+          report_fatal_error("Atomic result must be used by one AND");
+        C2 = cast<ConstantSDNode>(UI->getOperand(1));
+        assert(isPowerOf2_64(C2->getZExtValue()) && "Must be power of 2 value");
+      }
+      if (Opc == ISD::ATOMIC_LOAD_AND) {
+        assert(~C1->getAPIntValue() == C2->getAPIntValue() &&
+               "Cannot lower to BTR");
+        Opc = X86ISD::LBTR;
+      } else {
+        assert(C1 == C2 && "Cannot lower to BTS/BTC");
+        Opc = Opc == ISD::ATOMIC_LOAD_OR ? X86ISD::LBTS : X86ISD::LBTC;
+      }
+
+      unsigned Imm = countTrailingZeros(C2->getZExtValue());
+      MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+      SDValue Res = DAG.getMemIntrinsicNode(
+          Opc, DL, DAG.getVTList(VT, MVT::Other),
+          {Chain, LHS, DAG.getConstant(Imm, DL, VT)}, VT, MMO);
+      Chain = Res.getValue(1);
+      Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
+      if (Imm)
+        Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+                          DAG.getShiftAmountConstant(Imm, VT, DL));
+      return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Res, Chain);
+    }
     assert(Opc == ISD::ATOMIC_LOAD_ADD &&
            "Used AtomicRMW ops other than Add should have been expanded!");
     return N;
@@ -32642,6 +32705,9 @@
   NODE_NAME_CASE(LOR)
   NODE_NAME_CASE(LXOR)
   NODE_NAME_CASE(LAND)
+  NODE_NAME_CASE(LBTS)
+  NODE_NAME_CASE(LBTC)
+  NODE_NAME_CASE(LBTR)
   NODE_NAME_CASE(VZEXT_MOVL)
   NODE_NAME_CASE(VZEXT_LOAD)
   NODE_NAME_CASE(VEXTRACT_STORE)
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -839,6 +839,28 @@
   def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
 }
 
+multiclass ATOMIC_LOG<Format Form, string mnemonic, SDPatternOperator frag> {
+  let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+      SchedRW = [WriteBitTestSetRegRMW]  in {
+    def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i16imm:$src2),
+                  !strconcat(mnemonic, "{w}\t{$src2, $src1|$src1, $src2}"),
+                  [(set EFLAGS, (frag addr:$src1, (i16 imm:$src2)))]>,
+              OpSize16, TB, LOCK;
+    def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i32imm:$src2),
+                  !strconcat(mnemonic, "{l}\t{$src2, $src1|$src1, $src2}"),
+                  [(set EFLAGS, (frag addr:$src1, (i32 imm:$src2)))]>,
+              OpSize32, TB, LOCK;
+    def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i64imm:$src2),
+                   !strconcat(mnemonic, "{q}\t{$src2, $src1|$src1, $src2}"),
+                   [(set EFLAGS, (frag addr:$src1, (i64 imm:$src2)))]>,
+              TB, LOCK;
+  }
+}
+
+defm LOCK_BTS : ATOMIC_LOG<MRM5m, "bts", X86lock_bts>;
+defm LOCK_BTR : ATOMIC_LOG<MRM6m, "btr", X86lock_btr>;
+defm LOCK_BTC : ATOMIC_LOG<MRM7m, "btc", X86lock_btc>;
+
 // Atomic compare and swap.
 multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic, SDPatternOperator frag> {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -282,6 +282,15 @@
 def X86lock_and  : SDNode<"X86ISD::LAND",  SDTLockBinaryArithWithFlags,
                           [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
                            SDNPMemOperand]>;
+def X86lock_bts  : SDNode<"X86ISD::LBTS",  SDTLockBinaryArithWithFlags,
+                          [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+                           SDNPMemOperand]>;
+def X86lock_btc  : SDNode<"X86ISD::LBTC",  SDTLockBinaryArithWithFlags,
+                          [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+                           SDNPMemOperand]>;
+def X86lock_btr  : SDNode<"X86ISD::LBTR",  SDTLockBinaryArithWithFlags,
+                          [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+                           SDNPMemOperand]>;
 
 def X86bextr  : SDNode<"X86ISD::BEXTR",  SDTIntBinOp>;
 def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>;
diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll
@@ -0,0 +1,570 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+@v16 = dso_local global i16 0, align 2
+@v32 = dso_local global i32 0, align 4
+@v64 = dso_local global i64 0, align 8
+
+define i16 @bts1() nounwind {
+; X86-LABEL: bts1:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btsw $0, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: bts1:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsw $0, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2
+  %and = and i16 %0, 1
+  ret i16 %and
+}
+
+define i16 @bts2() nounwind {
+; X86-LABEL: bts2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btsw $1, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: bts2:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsw $1, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i16* @v16, i16 2 monotonic, align 2
+  %and = and i16 %0, 2
+  ret i16 %and
+}
+
+define i16 @bts15() nounwind {
+; X86-LABEL: bts15:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btsw $15, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $15, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: bts15:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsw $15, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $15, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i16* @v16, i16 32768 monotonic, align 2
+  %and = and i16 %0, 32768
+  ret i16 %and
+}
+
+define i32 @bts31() nounwind {
+; X86-LABEL: bts31:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btsl $31, v32
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: bts31:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsl $31, v32(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4
+  %and = and i32 %0, 2147483648
+  ret i32 %and
+}
+
+define i64 @bts63() nounwind {
+; X86-LABEL: bts63:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl $-2147483648, %esi # imm = 0x80000000
+; X86-NEXT:    movl v64+4, %edx
+; X86-NEXT:    movl v64, %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB4_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    lock cmpxchg8b v64
+; X86-NEXT:    jne .LBB4_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: bts63:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsq $63, v64(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shlq $63, %rax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8
+  %and = and i64 %0, -9223372036854775808
+  ret i64 %and
+}
+
+define i16 @btc1() nounwind {
+; X86-LABEL: btc1:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btcw $0, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btc1:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btcw $0, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor i16* @v16, i16 1 monotonic, align 2
+  %and = and i16 %0, 1
+  ret i16 %and
+}
+
+define i16 @btc2() nounwind {
+; X86-LABEL: btc2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btcw $1, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btc2:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btcw $1, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor i16* @v16, i16 2 monotonic, align 2
+  %and = and i16 %0, 2
+  ret i16 %and
+}
+
+define i16 @btc15() nounwind {
+; X86-LABEL: btc15:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btcw $15, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $15, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btc15:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btcw $15, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $15, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor i16* @v16, i16 32768 monotonic, align 2
+  %and = and i16 %0, 32768
+  ret i16 %and
+}
+
+define i32 @btc31() nounwind {
+; X86-LABEL: btc31:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btcl $31, v32
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btc31:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btcl $31, v32(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4
+  %and = and i32 %0, 2147483648
+  ret i32 %and
+}
+
+define i64 @btc63() nounwind {
+; X86-LABEL: btc63:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl $-2147483648, %esi # imm = 0x80000000
+; X86-NEXT:    movl v64+4, %edx
+; X86-NEXT:    movl v64, %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB9_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    lock cmpxchg8b v64
+; X86-NEXT:    jne .LBB9_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: btc63:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btcq $63, v64(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shlq $63, %rax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8
+  %and = and i64 %0, -9223372036854775808
+  ret i64 %and
+}
+
+define i16 @btr1() nounwind {
+; X86-LABEL: btr1:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btrw $0, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btr1:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrw $0, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and i16* @v16, i16 -2 monotonic, align 2
+  %and = and i16 %0, 1
+  ret i16 %and
+}
+
+define i16 @btr2() nounwind {
+; X86-LABEL: btr2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btrw $1, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btr2:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrw $1, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and i16* @v16, i16 -3 monotonic, align 2
+  %and = and i16 %0, 2
+  ret i16 %and
+}
+
+define i16 @btr15() nounwind {
+; X86-LABEL: btr15:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btrw $15, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $15, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btr15:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrw $15, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $15, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and i16* @v16, i16 32767 monotonic, align 2
+  %and = and i16 %0, 32768
+  ret i16 %and
+}
+
+define i32 @btr31() nounwind {
+; X86-LABEL: btr31:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btrl $31, v32
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: btr31:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrl $31, v32(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4
+  %and = and i32 %0, 2147483648
+  ret i32 %and
+}
+
+define i64 @btr63() nounwind {
+; X86-LABEL: btr63:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl $2147483647, %esi # imm = 0x7FFFFFFF
+; X86-NEXT:    movl $-1, %edi
+; X86-NEXT:    movl v64+4, %edx
+; X86-NEXT:    movl v64, %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB14_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    andl %edi, %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andl %esi, %ecx
+; X86-NEXT:    lock cmpxchg8b v64
+; X86-NEXT:    jne .LBB14_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    addl $1, %edi
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    andl %edi, %eax
+; X86-NEXT:    andl %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: btr63:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrq $63, v64(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shlq $63, %rax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8
+  %and = and i64 %0, -9223372036854775808
+  ret i64 %and
+}
+
+define i16 @multi_use1() nounwind {
+; X86-LABEL: multi_use1:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movzwl v16, %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB15_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orl $1, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, v16
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB15_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1, %ecx
+; X86-NEXT:    xorl $2, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: multi_use1:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzwl v16(%rip), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB15_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl $1, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, v16(%rip)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB15_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    xorl $2, %eax
+; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2
+  %1 = and i16 %0, 1
+  %2 = xor i16 %0, 2
+  %3 = or i16 %1, %2
+  ret i16 %3
+}
+
+define i16 @multi_use2() nounwind {
+; X86-LABEL: multi_use2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btsw $0, v16
+; X86-NEXT:    setb %al
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: multi_use2:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsw $0, v16(%rip)
+; X64-NEXT:    setb %al
+; X64-NEXT:    leal (%rax,%rax,2), %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2
+  %1 = and i16 %0, 1
+  %2 = shl i16 %1, 1
+  %3 = or i16 %1, %2
+  ret i16 %3
+}
+
+define i16 @use_in_diff_bb() nounwind {
+; X86-LABEL: use_in_diff_bb:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl v16, %esi
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB17_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    orl $1, %ecx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    lock cmpxchgw %cx, v16
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    jne .LBB17_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    jne .LBB17_4
+; X86-NEXT:  # %bb.3:
+; X86-NEXT:    calll foo@PLT
+; X86-NEXT:  .LBB17_4:
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: use_in_diff_bb:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movzwl v16(%rip), %ebx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB17_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    orl $1, %ecx
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    lock cmpxchgw %cx, v16(%rip)
+; X64-NEXT:    movl %eax, %ebx
+; X64-NEXT:    jne .LBB17_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    jne .LBB17_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    callq foo@PLT
+; X64-NEXT:  .LBB17_4:
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2
+  br i1 undef, label %1, label %2
+1:
+  call void @foo()
+  br label %3
+2:
+  br label %3
+3:
+  %and = and i16 %0, 1
+  ret i16 %and
+}
+
+declare void @foo()
+
+define void @no_and_cmp0_fold() nounwind {
+; X86-LABEL: no_and_cmp0_fold:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    lock btsl $3, v32
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    je .LBB18_1
+; X86-NEXT:  # %bb.2: # %if.end
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB18_1: # %if.then
+;
+; X64-LABEL: no_and_cmp0_fold:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    lock btsl $3, v32(%rip)
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    je .LBB18_1
+; X64-NEXT:  # %bb.2: # %if.end
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB18_1: # %if.then
+entry:
+  %0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4
+  %and = and i32 %0, 8
+  %tobool = icmp ne i32 %and, 0
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %or.cond8 = select i1 %tobool, i1 undef, i1 false
+  ret void
+}