diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
@@ -0,0 +1,1731 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i64 @atomic_shl1_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_shl2_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl2_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $2, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB1_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl i64 2, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_neq_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB2_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    incb %cl
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %add = add i64 %c, 1
+  %shl1 = shl nuw i64 1, %add
+  %and = and i64 %0, %shl1
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_small_mask_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_small_mask_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andb $31, %cl
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB3_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andl %edx, %eax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 31
+  %shl = shl nuw nsw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_mask0_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask0_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB4_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %shl1 = shl nuw i64 1, %c
+  %and = and i64 %0, %shl1
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_mask1_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask1_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB5_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %rem = and i64 %c, 63
+  %shl1 = shl nuw i64 1, %rem
+  %and = and i64 %0, %shl1
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_mask01_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask01_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB6_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_blsi_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_blsi_xor_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    negq %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    xorq %rcx, %rdx
+; CHECK-NEXT:    lock cmpxchgq %rdx, (%rdi)
+; CHECK-NEXT:    jne .LBB7_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %sub = sub i64 0, %c
+  %and = and i64 %sub, %c
+  %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8
+  %and3 = and i64 %0, %and
+  ret i64 %and3
+}
+
+define i64 @atomic_shl1_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB8_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB8_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = xor i64 %0, -1
+  %2 = lshr i64 %1, %c
+  %conv = and i64 %2, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl2_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl2_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $2, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB9_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB9_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl i64 2, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool.not = icmp eq i64 %and, 0
+  %conv = zext i1 %tobool.not to i64
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_neq_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB10_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB10_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    incb %cl
+; CHECK-NEXT:    movzbl %cl, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btq %rdx, %rax
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %add = add i64 %c, 1
+  %1 = xor i64 %0, -1
+  %2 = lshr i64 %1, %add
+  %conv = and i64 %2, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_small_mask_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_small_mask_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andl $31, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB11_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB11_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 31
+  %shl = shl nuw nsw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = xor i64 %0, -1
+  %2 = lshr i64 %1, %rem
+  %conv = and i64 %2, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask0_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask0_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB12_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB12_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = xor i64 %0, -1
+  %2 = lshr i64 %1, %c
+  %conv = and i64 %2, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask1_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask1_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB13_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB13_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %rem = and i64 %c, 63
+  %1 = xor i64 %0, -1
+  %2 = lshr i64 %1, %rem
+  %conv = and i64 %2, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask01_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask01_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andl $63, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB14_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB14_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = xor i64 %0, -1
+  %2 = lshr i64 %1, %rem
+  %conv = and i64 %2, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_blsi_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_blsi_xor_64_gpr_valz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    negq %rdx
+; CHECK-NEXT:    andq %rsi, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB15_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB15_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %sub = sub i64 0, %c
+  %and = and i64 %sub, %c
+  %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8
+  %and3 = and i64 %0, %and
+  %tobool.not = icmp eq i64 %and3, 0
+  %conv = zext i1 %tobool.not to i64
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB16_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB16_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = lshr i64 %0, %c
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl2_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl2_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $2, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB17_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB17_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl i64 2, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool = icmp ne i64 %and, 0
+  %conv = zext i1 %tobool to i64
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_neq_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB18_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB18_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    incb %cl
+; CHECK-NEXT:    movzbl %cl, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btq %rdx, %rax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %add = add i64 %c, 1
+  %1 = lshr i64 %0, %add
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_small_mask_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_small_mask_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andl $31, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB19_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB19_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 31
+  %shl = shl nuw nsw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = lshr i64 %0, %rem
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask0_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask0_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB20_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB20_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = lshr i64 %0, %c
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask1_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask1_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB21_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB21_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %rem = and i64 %c, 63
+  %1 = lshr i64 %0, %rem
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask01_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask01_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andl $63, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB22_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    xorq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB22_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
+  %1 = lshr i64 %0, %rem
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_blsi_xor_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_blsi_xor_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    negq %rdx
+; CHECK-NEXT:    andq %rsi, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB23_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq %rdx, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB23_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %sub = sub i64 0, %c
+  %and = and i64 %sub, %c
+  %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8
+  %and3 = and i64 %0, %and
+  %tobool = icmp ne i64 %and3, 0
+  %conv = zext i1 %tobool to i64
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB24_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB24_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_shl2_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl2_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $2, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq %rdx, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB25_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rcx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB25_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl i64 2, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_neq_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB26_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB26_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    incb %cl
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %add = add i64 %c, 1
+  %shl1 = shl nuw i64 1, %add
+  %and = and i64 %0, %shl1
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_small_mask_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_small_mask_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andb $31, %cl
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB27_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB27_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andl %edx, %eax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 31
+  %shl = shl nuw nsw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_mask0_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask0_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB28_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB28_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %shl1 = shl nuw i64 1, %c
+  %and = and i64 %0, %shl1
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_mask1_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask1_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB29_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB29_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %rem = and i64 %c, 63
+  %shl1 = shl nuw i64 1, %rem
+  %and = and i64 %0, %shl1
+  ret i64 %and
+}
+
+define i64 @atomic_shl1_mask01_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask01_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB30_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB30_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  ret i64 %and
+}
+
+define i64 @atomic_blsi_and_64_gpr_val(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_blsi_and_64_gpr_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    negq %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    notq %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB31_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB31_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    andq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %sub = sub i64 0, %c
+  %and = and i64 %sub, %c
+  %not = xor i64 %and, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and3 = and i64 %0, %and
+  ret i64 %and3
+}
+
+define i64 @atomic_shl1_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB32_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB32_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %1 = lshr i64 %0, %c
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl2_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl2_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $2, %edx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq %rdx, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB33_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rcx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB33_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl i64 2, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool = icmp ne i64 %and, 0
+  %conv = zext i1 %tobool to i64
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_neq_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB34_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB34_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    incb %cl
+; CHECK-NEXT:    movzbl %cl, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btq %rdx, %rax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %add = add i64 %c, 1
+  %1 = lshr i64 %0, %add
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_small_mask_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_small_mask_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andl $31, %ecx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB35_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB35_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 31
+  %shl = shl nuw nsw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %1 = lshr i64 %0, %rem
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask0_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask0_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB36_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB36_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %1 = lshr i64 %0, %c
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask1_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask1_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB37_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB37_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %rem = and i64 %c, 63
+  %1 = lshr i64 %0, %rem
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_mask01_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask01_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:    andl $63, %edx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB38_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB38_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btq %rdx, %rax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %1 = lshr i64 %0, %rem
+  %conv = and i64 %1, 1
+  ret i64 %conv
+}
+
+define i64 @atomic_blsi_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_blsi_and_64_gpr_valnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    negq %rdx
+; CHECK-NEXT:    andq %rsi, %rdx
+; CHECK-NEXT:    movq %rdx, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB39_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rcx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB39_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %sub = sub i64 0, %c
+  %and = and i64 %sub, %c
+  %not = xor i64 %and, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and3 = and i64 %0, %and
+  %tobool = icmp ne i64 %and3, 0
+  %conv = zext i1 %tobool to i64
+  ret i64 %conv
+}
+
+define i64 @atomic_shl1_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB40_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %r8
+; CHECK-NEXT:    andq %rsi, %r8
+; CHECK-NEXT:    lock cmpxchgq %r8, (%rdi)
+; CHECK-NEXT:    jne .LBB40_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    je .LBB40_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB40_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl2_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl2_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $2, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    notq %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB41_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %r8
+; CHECK-NEXT:    andq %rsi, %r8
+; CHECK-NEXT:    lock cmpxchgq %r8, (%rdi)
+; CHECK-NEXT:    jne .LBB41_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    je .LBB41_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB41_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl i64 2, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_neq_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB42_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB42_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    leal 1(%rcx), %edx
+; CHECK-NEXT:    movzbl %dl, %edx
+; CHECK-NEXT:    btq %rdx, %rax
+; CHECK-NEXT:    jae .LBB42_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB42_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %add = add i64 %c, 1
+  %shl1 = shl nuw i64 1, %add
+  %and = and i64 %0, %shl1
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_small_mask_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    andl $31, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB43_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %r8
+; CHECK-NEXT:    andq %rsi, %r8
+; CHECK-NEXT:    lock cmpxchgq %r8, (%rdi)
+; CHECK-NEXT:    jne .LBB43_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    testl %edx, %eax
+; CHECK-NEXT:    je .LBB43_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB43_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 31
+  %shl = shl nuw nsw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_mask0_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask0_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB44_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB44_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    jae .LBB44_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB44_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %shl1 = shl nuw i64 1, %c
+  %and = and i64 %0, %shl1
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_mask1_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask1_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movq $-2, %rdx
+; CHECK-NEXT:    rolq %cl, %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB45_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    lock cmpxchgq %rsi, (%rdi)
+; CHECK-NEXT:    jne .LBB45_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    btq %rcx, %rax
+; CHECK-NEXT:    jae .LBB45_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB45_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %shl = shl nuw i64 1, %c
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %rem = and i64 %c, 63
+  %shl1 = shl nuw i64 1, %rem
+  %and = and i64 %0, %shl1
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_mask01_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_shl1_mask01_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    shlq %cl, %rdx
+; CHECK-NEXT:    movq $-2, %rsi
+; CHECK-NEXT:    rolq %cl, %rsi
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB46_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %r8
+; CHECK-NEXT:    andq %rsi, %r8
+; CHECK-NEXT:    lock cmpxchgq %r8, (%rdi)
+; CHECK-NEXT:    jne .LBB46_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    testq %rdx, %rax
+; CHECK-NEXT:    je .LBB46_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rcx,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB46_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %rem = and i64 %c, 63
+  %shl = shl nuw i64 1, %rem
+  %not = xor i64 %shl, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and = and i64 %0, %shl
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_blsi_and_64_gpr_brnz(ptr %v, i64 %c) nounwind {
+; CHECK-LABEL: atomic_blsi_and_64_gpr_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    negq %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    notq %rdx
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB47_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %r8
+; CHECK-NEXT:    andq %rdx, %r8
+; CHECK-NEXT:    lock cmpxchgq %r8, (%rdi)
+; CHECK-NEXT:    jne .LBB47_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    testq %rcx, %rax
+; CHECK-NEXT:    je .LBB47_3
+; CHECK-NEXT:  # %bb.4: # %if.then
+; CHECK-NEXT:    movq (%rdi,%rsi,8), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB47_3:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %sub = sub i64 0, %c
+  %and = and i64 %sub, %c
+  %not = xor i64 %and, -1
+  %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
+  %and3 = and i64 %0, %and
+  %tobool.not = icmp eq i64 %and3, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_xor_64_const_br(ptr %v) nounwind {
+; CHECK-LABEL: atomic_shl1_xor_64_const_br:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    lock btcq $4, (%rdi)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    shlq $4, %rax
+; CHECK-NEXT:    je .LBB48_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    movq 32(%rdi), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB48_1:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
+  %and = and i64 %0, 16
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 4
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_neq_xor_64_const_br(ptr %v) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_xor_64_const_br:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB49_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq $16, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB49_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $123, %ecx
+; CHECK-NEXT:    testb $32, %al
+; CHECK-NEXT:    je .LBB49_4
+; CHECK-NEXT:  # %bb.3: # %if.then
+; CHECK-NEXT:    movq 32(%rdi), %rcx
+; CHECK-NEXT:  .LBB49_4: # %return
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
+  %and = and i64 %0, 32
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 4
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_xor_64_const_brz(ptr %v) nounwind {
+; CHECK-LABEL: atomic_shl1_xor_64_const_brz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    lock btcq $4, (%rdi)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    shlq $4, %rax
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    je .LBB50_1
+; CHECK-NEXT:  # %bb.2: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB50_1: # %if.then
+; CHECK-NEXT:    movq 32(%rdi), %rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
+  %and = and i64 %0, 16
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 4
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_neq_xor_64_const_brz(ptr %v) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_xor_64_const_brz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB51_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq $16, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB51_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $123, %ecx
+; CHECK-NEXT:    testb $32, %al
+; CHECK-NEXT:    jne .LBB51_4
+; CHECK-NEXT:  # %bb.3: # %if.then
+; CHECK-NEXT:    movq 32(%rdi), %rcx
+; CHECK-NEXT:  .LBB51_4: # %return
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
+  %and = and i64 %0, 32
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 4
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_xor_64_const_brnz(ptr %v) nounwind {
+; CHECK-LABEL: atomic_shl1_xor_64_const_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    lock btcq $4, (%rdi)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    shlq $4, %rax
+; CHECK-NEXT:    je .LBB52_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    movq 32(%rdi), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB52_1:
+; CHECK-NEXT:    movl $123, %eax
+; CHECK-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
+  %and = and i64 %0, 16
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 4
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
+
+define i64 @atomic_shl1_neq_xor_64_const_brnz(ptr %v) nounwind {
+; CHECK-LABEL: atomic_shl1_neq_xor_64_const_brnz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB53_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    xorq $16, %rcx
+; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT:    jne .LBB53_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    movl $123, %ecx
+; CHECK-NEXT:    testb $32, %al
+; CHECK-NEXT:    je .LBB53_4
+; CHECK-NEXT:  # %bb.3: # %if.then
+; CHECK-NEXT:    movq 32(%rdi), %rcx
+; CHECK-NEXT:  .LBB53_4: # %return
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
+  %and = and i64 %0, 32
+  %tobool.not = icmp eq i64 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i64, ptr %v, i64 4
+  %1 = load i64, ptr %arrayidx, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ]
+  ret i64 %retval.0
+}
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
@@ -0,0 +1,7389 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64
+
+define zeroext i8 @atomic_shl1_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_or_8_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzbl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB0_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orb %dl, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%esi)
+; X86-NEXT:    jne .LBB0_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andb %dl, %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_8_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB0_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orb %dl, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB0_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andb %dl, %al
+; X64-NEXT:    retq
+entry:
+  %conv = zext i8 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i8
+  %0 = atomicrmw or ptr %v, i8 %conv1 monotonic, align 1
+  %conv5 = and i8 %0, %conv1
+  ret i8 %conv5
+}
+
+define zeroext i8 @atomic_shl1_mask0_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_8_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movb $1, %ah
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movb (%esi), %al
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB1_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orb %ah, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%esi)
+; X86-NEXT:    jne .LBB1_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $1, %ebx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    andb %bl, %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_8_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movb $1, %dl
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB1_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orb %dl, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB1_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andb %dl, %al
+; X64-NEXT:    retq
+entry:
+  %0 = and i8 %c, 7
+  %shl = shl nuw i8 1, %0
+  %1 = atomicrmw or ptr %v, i8 %shl monotonic, align 1
+  %conv3 = zext i8 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %2 = trunc i32 %shl4 to i8
+  %conv5 = and i8 %1, %2
+  ret i8 %conv5
+}
+
+define zeroext i8 @atomic_shl1_mask01_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_8_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movb $1, %ah
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movb (%edx), %al
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB2_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orb %ah, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%edx)
+; X86-NEXT:    jne .LBB2_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andb %ah, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_8_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movb $1, %dl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB2_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orb %dl, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB2_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andb %dl, %al
+; X64-NEXT:    retq
+entry:
+  %0 = and i8 %c, 7
+  %shl = shl nuw i8 1, %0
+  %1 = atomicrmw or ptr %v, i8 %shl monotonic, align 1
+  %conv7 = and i8 %1, %shl
+  ret i8 %conv7
+}
+
+define zeroext i8 @atomic_shl1_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_xor_8_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzbl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB3_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorb %dl, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%esi)
+; X86-NEXT:    jne .LBB3_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    testl %eax, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_8_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB3_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorb %dl, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB3_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    testl %eax, %edx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %conv = zext i8 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i8
+  %0 = atomicrmw xor ptr %v, i8 %conv1 monotonic, align 1
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv5 = zext i1 %tobool.not to i8
+  ret i8 %conv5
+}
+
+define zeroext i8 @atomic_shl1_mask0_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_xor_8_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movb $1, %ah
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movb (%esi), %al
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB4_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorb %ah, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%esi)
+; X86-NEXT:    jne .LBB4_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    movzbl %dl, %ecx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_xor_8_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movb $1, %dl
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB4_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorb %dl, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB4_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    movzbl %sil, %ecx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+entry:
+  %0 = and i8 %c, 7
+  %shl = shl nuw i8 1, %0
+  %1 = atomicrmw xor ptr %v, i8 %shl monotonic, align 1
+  %conv2 = zext i8 %1 to i32
+  %conv3 = zext i8 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv5 = zext i1 %tobool.not to i8
+  ret i8 %conv5
+}
+
+define zeroext i8 @atomic_shl1_mask01_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_xor_8_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movl $1, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movzbl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB5_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorb %bl, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%edx)
+; X86-NEXT:    jne .LBB5_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    testl %eax, %ebx
+; X86-NEXT:    sete %al
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_xor_8_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB5_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorb %dl, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB5_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    testl %eax, %edx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %0 = and i8 %c, 7
+  %sh_prom = zext i8 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %conv1 = trunc i32 %shl to i8
+  %1 = atomicrmw xor ptr %v, i8 %conv1 monotonic, align 1
+  %conv2 = zext i8 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv7 = zext i1 %tobool.not to i8
+  ret i8 %conv7
+}
+
+define zeroext i8 @atomic_shl1_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_and_8_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movb %bl, %ah
+; X86-NEXT:    notb %ah
+; X86-NEXT:    movb (%edx), %al
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB6_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movb %al, %ch
+; X86-NEXT:    andb %ah, %ch
+; X86-NEXT:    lock cmpxchgb %ch, (%edx)
+; X86-NEXT:    jne .LBB6_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    testl %eax, %ebx
+; X86-NEXT:    je .LBB6_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    movzbl (%edx,%eax), %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB6_3:
+; X86-NEXT:    movb $123, %al
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_8_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    notb %sil
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB6_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %r8d
+; X64-NEXT:    andb %sil, %r8b
+; X64-NEXT:    lock cmpxchgb %r8b, (%rdi)
+; X64-NEXT:    jne .LBB6_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    testl %eax, %edx
+; X64-NEXT:    je .LBB6_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    movzbl (%rdi,%rax), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB6_3:
+; X64-NEXT:    movb $123, %al
+; X64-NEXT:    retq
+entry:
+  %conv = zext i8 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i8
+  %conv1 = xor i8 %0, -1
+  %1 = atomicrmw and ptr %v, i8 %conv1 monotonic, align 1
+  %conv2 = zext i8 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i8 %c to i64
+  %arrayidx = getelementptr inbounds i8, ptr %v, i64 %idxprom
+  %2 = load i8, ptr %arrayidx, align 1
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i8 [ %2, %if.then ], [ 123, %entry ]
+  ret i8 %retval.0
+}
+
+define zeroext i8 @atomic_shl1_mask0_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_8_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb $-2, %ah
+; X86-NEXT:    rolb %cl, %ah
+; X86-NEXT:    movb (%edx), %al
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB7_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movb %al, %ch
+; X86-NEXT:    andb %ah, %ch
+; X86-NEXT:    lock cmpxchgb %ch, (%edx)
+; X86-NEXT:    jne .LBB7_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzbl %al, %esi
+; X86-NEXT:    movzbl %cl, %eax
+; X86-NEXT:    btl %eax, %esi
+; X86-NEXT:    jae .LBB7_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzbl (%edx,%eax), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB7_3:
+; X86-NEXT:    movb $123, %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_8_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-2, %dl
+; X64-NEXT:    rolb %cl, %dl
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB7_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andb %dl, %sil
+; X64-NEXT:    lock cmpxchgb %sil, (%rdi)
+; X64-NEXT:    jne .LBB7_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzbl %al, %edx
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    btl %eax, %edx
+; X64-NEXT:    jae .LBB7_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzbl (%rdi,%rax), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB7_3:
+; X64-NEXT:    movb $123, %al
+; X64-NEXT:    retq
+entry:
+  %0 = and i8 %c, 7
+  %shl = shl nuw i8 1, %0
+  %not = xor i8 %shl, -1
+  %1 = atomicrmw and ptr %v, i8 %not monotonic, align 1
+  %conv2 = zext i8 %1 to i32
+  %conv3 = zext i8 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i8 %c to i64
+  %arrayidx = getelementptr inbounds i8, ptr %v, i64 %conv
+  %2 = load i8, ptr %arrayidx, align 1
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i8 [ %2, %if.then ], [ 123, %entry ]
+  ret i8 %retval.0
+}
+
+define zeroext i8 @atomic_shl1_mask01_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_8_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb %ah, %cl
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movl $1, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    notb %cl
+; X86-NEXT:    movb (%edx), %al
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB8_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movb %al, %ch
+; X86-NEXT:    andb %cl, %ch
+; X86-NEXT:    lock cmpxchgb %ch, (%edx)
+; X86-NEXT:    jne .LBB8_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    testl %ecx, %ebx
+; X86-NEXT:    je .LBB8_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzbl %ah, %eax
+; X86-NEXT:    movzbl (%edx,%eax), %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB8_3:
+; X86-NEXT:    movb $123, %al
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_8_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    notb %cl
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB8_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %r8d
+; X64-NEXT:    andb %cl, %r8b
+; X64-NEXT:    lock cmpxchgb %r8b, (%rdi)
+; X64-NEXT:    jne .LBB8_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    testl %eax, %edx
+; X64-NEXT:    je .LBB8_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movzbl (%rdi,%rax), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB8_3:
+; X64-NEXT:    movb $123, %al
+; X64-NEXT:    retq
+entry:
+  %0 = and i8 %c, 7
+  %sh_prom = zext i8 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %1 = trunc i32 %shl to i8
+  %conv1 = xor i8 %1, -1
+  %2 = atomicrmw and ptr %v, i8 %conv1 monotonic, align 1
+  %conv2 = zext i8 %2 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i8 %c to i64
+  %arrayidx = getelementptr inbounds i8, ptr %v, i64 %conv
+  %3 = load i8, ptr %arrayidx, align 1
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i8 [ %3, %if.then ], [ 123, %entry ]
+  ret i8 %retval.0
+}
+
+define zeroext i8 @atomic_shl1_and_8_gpr_val(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_8_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB9_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andb $-17, %dl
+; X86-NEXT:    lock cmpxchgb %dl, (%ecx)
+; X86-NEXT:    jne .LBB9_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andb $16, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_8_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB9_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andb $-17, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB9_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andb $16, %al
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i8 -17 monotonic, align 1
+  %1 = and i8 %0, 16
+  ret i8 %1
+}
+
+define zeroext i8 @atomic_shl1_or_8_gpr_valnz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_or_8_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB10_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    orb $16, %dl
+; X86-NEXT:    lock cmpxchgb %dl, (%ecx)
+; X86-NEXT:    jne .LBB10_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    shrb $4, %al
+; X86-NEXT:    andb $1, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_8_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB10_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orb $16, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB10_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    shrb $4, %al
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or ptr %v, i8 16 monotonic, align 1
+  %1 = lshr i8 %0, 4
+  %.lobit = and i8 %1, 1
+  ret i8 %.lobit
+}
+
+define zeroext i8 @atomic_shl1_xor_8_gpr_brz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_xor_8_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB11_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorb $16, %cl
+; X86-NEXT:    lock cmpxchgb %cl, (%edx)
+; X86-NEXT:    jne .LBB11_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movb $123, %cl
+; X86-NEXT:    testb $16, %al
+; X86-NEXT:    jne .LBB11_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzbl 4(%edx), %ecx
+; X86-NEXT:  .LBB11_4: # %return
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_8_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB11_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorb $16, %cl
+; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
+; X64-NEXT:    jne .LBB11_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movb $123, %cl
+; X64-NEXT:    testb $16, %al
+; X64-NEXT:    jne .LBB11_4
+; X64-NEXT:  # %bb.3: # %if.then
+; X64-NEXT:    movzbl 4(%rdi), %ecx
+; X64-NEXT:  .LBB11_4: # %return
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i8 16 monotonic, align 1
+  %1 = and i8 %0, 16
+  %tobool.not = icmp eq i8 %1, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i8, ptr %v, i64 4
+  %2 = load i8, ptr %arrayidx, align 1
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i8 [ %2, %if.then ], [ 123, %entry ]
+  ret i8 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_xor_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB12_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB12_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB12_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB12_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv5 = and i16 %0, %conv1
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_small_mask_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_xor_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    movzwl %si, %ecx
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB13_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %si, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB13_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_xor_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movzwl %dx, %ecx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB13_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    xorl %ecx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %dx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB13_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %and = and i16 %1, %shl
+  ret i16 %and
+}
+
+define zeroext i16 @atomic_shl1_mask0_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_xor_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB14_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB14_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    andl %esi, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_xor_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB14_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB14_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %2 = trunc i32 %shl4 to i16
+  %conv5 = and i16 %1, %2
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_mask1_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_xor_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB15_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB15_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    # kill: def $cl killed $cl killed $cx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_xor_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB15_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB15_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %1 = and i16 %c, 15
+  %shl4 = shl nuw i16 1, %1
+  %and = and i16 %0, %shl4
+  ret i16 %and
+}
+
+define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_xor_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB16_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB16_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_xor_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB16_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB16_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv7 = and i16 %1, %shl
+  ret i16 %conv7
+}
+
+define zeroext i16 @atomic_blsi_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_xor_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB17_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %si, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB17_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_xor_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB17_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    xorl %ecx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %dx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB17_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i16 0, %c
+  %and = and i16 %sub, %c
+  %0 = atomicrmw xor ptr %v, i16 %and monotonic, align 2
+  %conv9 = and i16 %0, %and
+  ret i16 %conv9
+}
+
+define zeroext i16 @atomic_shl1_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_xor_16_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB18_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB18_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ecx, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_16_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB18_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB18_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    sete %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv5 = zext i1 %tobool.not to i16
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_small_mask_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_xor_16_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $7, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    movzwl %si, %esi
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB19_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB19_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setae %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_xor_16_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $7, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movzwl %dx, %edx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB19_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB19_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setae %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv5 = zext i16 %1 to i32
+  %conv6 = zext i16 %0 to i32
+  %shl7 = shl nuw nsw i32 1, %conv6
+  %and = and i32 %shl7, %conv5
+  %tobool.not = icmp eq i32 %and, 0
+  %conv8 = zext i1 %tobool.not to i16
+  ret i16 %conv8
+}
+
+define zeroext i16 @atomic_shl1_mask0_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_xor_16_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB20_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB20_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %edx, %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_xor_16_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB20_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB20_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %esi, %ecx
+; X64-NEXT:    setae %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv5 = zext i1 %tobool.not to i16
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_mask1_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_xor_16_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB21_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB21_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setae %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_xor_16_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB21_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB21_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setae %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %0 to i32
+  %1 = and i16 %c, 15
+  %sh_prom = zext i16 %1 to i32
+  %shl4 = shl nuw nsw i32 1, %sh_prom
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv5 = zext i1 %tobool.not to i16
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_xor_16_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB22_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB22_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ecx, %esi
+; X86-NEXT:    sete %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_xor_16_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB22_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB22_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    sete %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %sh_prom = zext i16 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %conv1 = trunc i32 %shl to i16
+  %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  %conv7 = zext i1 %tobool.not to i16
+  ret i16 %conv7
+}
+
+define zeroext i16 @atomic_blsi_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_xor_16_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB23_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %si, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB23_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %edx, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_xor_16_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB23_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    xorl %ecx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %dx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB23_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %edx, %ecx
+; X64-NEXT:    sete %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %sub = sub nsw i32 0, %conv
+  %and = and i32 %conv, %sub
+  %conv2 = trunc i32 %and to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2
+  %conv3 = zext i16 %0 to i32
+  %and8 = and i32 %and, %conv3
+  %tobool.not = icmp eq i32 %and8, 0
+  %conv9 = zext i1 %tobool.not to i16
+  ret i16 %conv9
+}
+
+define zeroext i16 @atomic_shl1_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_xor_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB24_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB24_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ecx, %edx
+; X86-NEXT:    setne %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB24_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB24_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    setne %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %shl, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv6 = zext i1 %tobool to i16
+  ret i16 %conv6
+}
+
+define zeroext i16 @atomic_shl1_small_mask_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_xor_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $7, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    movzwl %si, %esi
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB25_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB25_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_xor_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $7, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movzwl %dx, %edx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB25_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB25_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv5 = zext i16 %1 to i32
+  %conv6 = zext i16 %0 to i32
+  %shl7 = shl nuw nsw i32 1, %conv6
+  %and = and i32 %shl7, %conv5
+  %tobool = icmp ne i32 %and, 0
+  %conv9 = zext i1 %tobool to i16
+  ret i16 %conv9
+}
+
+define zeroext i16 @atomic_shl1_mask0_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_xor_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB26_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB26_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %edx, %ecx
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_xor_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB26_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB26_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %esi, %ecx
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv6 = zext i1 %tobool to i16
+  ret i16 %conv6
+}
+
+define zeroext i16 @atomic_shl1_mask1_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_xor_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB27_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB27_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_xor_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB27_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB27_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %0 to i32
+  %1 = and i16 %c, 15
+  %sh_prom = zext i16 %1 to i32
+  %shl4 = shl nuw nsw i32 1, %sh_prom
+  %and = and i32 %shl4, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv6 = zext i1 %tobool to i16
+  ret i16 %conv6
+}
+
+define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_xor_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB28_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB28_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ecx, %esi
+; X86-NEXT:    setne %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_xor_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB28_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB28_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    setne %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %sh_prom = zext i16 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %conv1 = trunc i32 %shl to i16
+  %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv8 = zext i1 %tobool to i16
+  ret i16 %conv8
+}
+
+define zeroext i16 @atomic_blsi_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_xor_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB29_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %ecx, %esi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %si, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB29_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %edx, %ecx
+; X86-NEXT:    setne %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_xor_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB29_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    xorl %ecx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %dx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB29_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %edx, %ecx
+; X64-NEXT:    setne %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %sub = sub nsw i32 0, %conv
+  %and = and i32 %conv, %sub
+  %conv2 = trunc i32 %and to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2
+  %conv3 = zext i16 %0 to i32
+  %and8 = and i32 %and, %conv3
+  %tobool = icmp ne i32 %and8, 0
+  %conv10 = zext i1 %tobool to i16
+  ret i16 %conv10
+}
+
+define zeroext i16 @atomic_shl1_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_xor_16_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB30_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB30_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edi
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    testl %edi, %esi
+; X86-NEXT:    jne .LBB30_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:  .LBB30_4: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_16_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB30_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB30_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %esi
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    testl %esi, %edx
+; X64-NEXT:    je .LBB30_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB30_3: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+  %1 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_small_mask_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_xor_16_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $7, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    movzwl %si, %esi
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB31_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB31_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %esi
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    btl %ecx, %esi
+; X86-NEXT:    jb .LBB31_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:  .LBB31_4: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_xor_16_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $7, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movzwl %dx, %edx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB31_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB31_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    jae .LBB31_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB31_3: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv5 = zext i16 %1 to i32
+  %conv6 = zext i16 %0 to i32
+  %shl7 = shl nuw nsw i32 1, %conv6
+  %and = and i32 %shl7, %conv5
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i16 %0 to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv2
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_mask0_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_xor_16_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB32_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB32_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    btl %ebx, %ecx
+; X86-NEXT:    jb .LBB32_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzwl %bx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:  .LBB32_4: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_xor_16_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB32_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB32_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    btl %esi, %ecx
+; X64-NEXT:    jae .LBB32_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB32_3: # %if.then
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_mask1_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_xor_16_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB33_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB33_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %esi
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    andl $15, %edi
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    btl %edi, %esi
+; X86-NEXT:    jb .LBB33_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:  .LBB33_4: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_xor_16_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB33_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    xorl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB33_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    movl %ecx, %esi
+; X64-NEXT:    andl $15, %esi
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    btl %esi, %edx
+; X64-NEXT:    jae .LBB33_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB33_3: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %conv1 = trunc i32 %shl to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %0 to i32
+  %1 = and i16 %c, 15
+  %sh_prom = zext i16 %1 to i32
+  %shl4 = shl nuw nsw i32 1, %sh_prom
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv3 = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_xor_16_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB34_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB34_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    testl %ecx, %esi
+; X86-NEXT:    jne .LBB34_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzwl %bx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:  .LBB34_4: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_xor_16_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB34_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl %edx, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB34_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    je .LBB34_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB34_3: # %if.then
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %sh_prom = zext i16 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %conv1 = trunc i32 %shl to i16
+  %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_blsi_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_xor_16_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB35_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB35_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edi
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    testl %edi, %esi
+; X86-NEXT:    jne .LBB35_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:  .LBB35_4: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_xor_16_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB35_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    xorl %ecx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %dx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB35_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    testl %edx, %ecx
+; X64-NEXT:    je .LBB35_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB35_3: # %if.then
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %sub = sub nsw i32 0, %conv
+  %and = and i32 %conv, %sub
+  %conv2 = trunc i32 %and to i16
+  %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2
+  %conv3 = zext i16 %0 to i32
+  %and8 = and i32 %and, %conv3
+  %tobool.not = icmp eq i32 %and8, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+  %1 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_and_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl $-2, %edi
+; X86-NEXT:    roll %cl, %edi
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB36_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB36_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl $-2, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %esi
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB36_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB36_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i16
+  %conv1 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv5 = and i16 %1, %0
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_and_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $7, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movw $-2, %di
+; X86-NEXT:    rolw %cl, %di
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB37_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB37_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %si, %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_and_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $7, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movw $-2, %si
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rolw %cl, %si
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB37_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB37_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %dx, %ecx
+; X64-NEXT:    andl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %not = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
+  %and = and i16 %1, %shl
+  ret i16 %and
+}
+
+define zeroext i16 @atomic_shl1_mask0_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movw $-2, %si
+; X86-NEXT:    rolw %cl, %si
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB38_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB38_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    # kill: def $cl killed $cl killed $cx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movw $-2, %dx
+; X64-NEXT:    rolw %cl, %dx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB38_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB38_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %not = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %2 = trunc i32 %shl4 to i16
+  %conv5 = and i16 %1, %2
+  ret i16 %conv5
+}
+
+define zeroext i16 @atomic_shl1_mask1_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_and_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $-2, %esi
+; X86-NEXT:    roll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB39_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB39_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    # kill: def $cl killed $cl killed $cx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_and_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $-2, %edx
+; X64-NEXT:    roll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB39_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB39_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i16
+  %conv1 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %2 = and i16 %c, 15
+  %shl4 = shl nuw i16 1, %2
+  %and = and i16 %1, %shl4
+  ret i16 %and
+}
+
+define zeroext i16 @atomic_shl1_mask01_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movw $-2, %di
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    rolw %cl, %di
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB40_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB40_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movw $-2, %r8w
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    rolw %cl, %r8w
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB40_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl %r8d, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB40_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %conv1 = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv7 = and i16 %1, %shl
+  ret i16 %conv7
+}
+
+define zeroext i16 @atomic_blsi_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_and_16_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB41_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB41_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_and_16_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    notl %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB41_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB41_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i16 0, %c
+  %and = and i16 %sub, %c
+  %conv2 = xor i16 %and, -1
+  %0 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2
+  %conv9 = and i16 %0, %and
+  ret i16 %conv9
+}
+
+define zeroext i16 @atomic_shl1_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_and_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl $-2, %edi
+; X86-NEXT:    roll %cl, %edi
+; X86-NEXT:    movzwl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB42_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%esi)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB42_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ecx, %edx
+; X86-NEXT:    setne %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl $-2, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %esi
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB42_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB42_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    setne %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i16
+  %conv1 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv6 = zext i1 %tobool to i16
+  ret i16 %conv6
+}
+
+define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_and_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $7, %ecx
+; X86-NEXT:    movw $-2, %si
+; X86-NEXT:    rolw %cl, %si
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB43_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB43_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_and_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $7, %ecx
+; X64-NEXT:    movw $-2, %dx
+; X64-NEXT:    rolw %cl, %dx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB43_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB43_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %not = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
+  %conv5 = zext i16 %1 to i32
+  %conv6 = zext i16 %0 to i32
+  %shl7 = shl nuw nsw i32 1, %conv6
+  %and = and i32 %shl7, %conv5
+  %tobool = icmp ne i32 %and, 0
+  %conv9 = zext i1 %tobool to i16
+  ret i16 %conv9
+}
+
+define zeroext i16 @atomic_shl1_mask0_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movw $-2, %si
+; X86-NEXT:    rolw %cl, %si
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB44_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB44_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movw $-2, %dx
+; X64-NEXT:    rolw %cl, %dx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB44_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB44_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %not = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv6 = zext i1 %tobool to i16
+  ret i16 %conv6
+}
+
+define zeroext i16 @atomic_shl1_mask1_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_and_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $-2, %esi
+; X86-NEXT:    roll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB45_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB45_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_and_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $-2, %edx
+; X64-NEXT:    roll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB45_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB45_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i16
+  %conv1 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %2 = and i16 %c, 15
+  %sh_prom = zext i16 %2 to i32
+  %shl4 = shl nuw nsw i32 1, %sh_prom
+  %and = and i32 %shl4, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv6 = zext i1 %tobool to i16
+  ret i16 %conv6
+}
+
+define zeroext i16 @atomic_shl1_mask01_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl $-2, %edi
+; X86-NEXT:    roll %cl, %edi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB46_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB46_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %ecx, %esi
+; X86-NEXT:    setne %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl $-2, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %esi
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB46_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB46_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %ecx, %edx
+; X64-NEXT:    setne %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %sh_prom = zext i16 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %1 = trunc i32 %shl to i16
+  %conv1 = xor i16 %1, -1
+  %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %2 to i32
+  %and = and i32 %shl, %conv2
+  %tobool = icmp ne i32 %and, 0
+  %conv8 = zext i1 %tobool to i16
+  ret i16 %conv8
+}
+
+define zeroext i16 @atomic_blsi_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_and_16_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB47_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB47_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %edx, %ecx
+; X86-NEXT:    setne %al
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_and_16_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    notl %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB47_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB47_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %edx, %ecx
+; X64-NEXT:    setne %al
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %sub = sub nsw i32 0, %conv
+  %and = and i32 %conv, %sub
+  %0 = trunc i32 %and to i16
+  %conv2 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2
+  %conv3 = zext i16 %1 to i32
+  %and8 = and i32 %and, %conv3
+  %tobool = icmp ne i32 %and8, 0
+  %conv10 = zext i1 %tobool to i16
+  ret i16 %conv10
+}
+
+define zeroext i16 @atomic_shl1_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_and_16_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl $-2, %edi
+; X86-NEXT:    roll %cl, %edi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB48_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    andl %edi, %ebx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %bx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB48_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    testl %eax, %esi
+; X86-NEXT:    je .LBB48_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:    jmp .LBB48_5
+; X86-NEXT:  .LBB48_3:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:  .LBB48_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_16_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl $-2, %esi
+; X64-NEXT:    roll %cl, %esi
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB48_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %r8d
+; X64-NEXT:    andl %esi, %r8d
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %r8w, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB48_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    testl %eax, %edx
+; X64-NEXT:    je .LBB48_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB48_3:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i16
+  %conv1 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_and_16_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $7, %ecx
+; X86-NEXT:    movw $-2, %si
+; X86-NEXT:    rolw %cl, %si
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB49_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB49_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB49_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:    jmp .LBB49_5
+; X86-NEXT:  .LBB49_3:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:  .LBB49_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_and_16_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $7, %ecx
+; X64-NEXT:    movw $-2, %dx
+; X64-NEXT:    rolw %cl, %dx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB49_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB49_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB49_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB49_3:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 7
+  %shl = shl nuw nsw i16 1, %0
+  %not = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
+  %conv5 = zext i16 %1 to i32
+  %conv6 = zext i16 %0 to i32
+  %shl7 = shl nuw nsw i32 1, %conv6
+  %and = and i32 %shl7, %conv5
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i16 %0 to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv2
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_mask0_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_16_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movw $-2, %si
+; X86-NEXT:    rolw %cl, %si
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB50_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB50_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB50_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:    jmp .LBB50_5
+; X86-NEXT:  .LBB50_3:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:  .LBB50_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_16_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movw $-2, %dx
+; X64-NEXT:    rolw %cl, %dx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB50_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB50_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB50_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB50_3:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %shl = shl nuw i16 1, %0
+  %not = xor i16 %shl, -1
+  %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %conv3 = zext i16 %c to i32
+  %shl4 = shl nuw i32 1, %conv3
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_mask1_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_and_16_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $-2, %esi
+; X86-NEXT:    roll %cl, %esi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB51_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %di, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB51_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    andl $15, %esi
+; X86-NEXT:    btl %esi, %eax
+; X86-NEXT:    jae .LBB51_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:    jmp .LBB51_5
+; X86-NEXT:  .LBB51_3:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:  .LBB51_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_and_16_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $-2, %edx
+; X64-NEXT:    roll %cl, %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB51_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %si, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB51_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    andl $15, %edx
+; X64-NEXT:    btl %edx, %eax
+; X64-NEXT:    jae .LBB51_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzwl %cx, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB51_3:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %shl = shl nuw i32 1, %conv
+  %0 = trunc i32 %shl to i16
+  %conv1 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %1 to i32
+  %2 = and i16 %c, 15
+  %sh_prom = zext i16 %2 to i32
+  %shl4 = shl nuw nsw i32 1, %sh_prom
+  %and = and i32 %shl4, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv3 = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3
+  %3 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_mask01_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_16_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl $-2, %edi
+; X86-NEXT:    roll %cl, %edi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB52_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %cx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB52_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    testl %eax, %esi
+; X86-NEXT:    je .LBB52_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzwl %bx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:    jmp .LBB52_5
+; X86-NEXT:  .LBB52_3:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:  .LBB52_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_16_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl $-2, %r8d
+; X64-NEXT:    roll %cl, %r8d
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB52_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl %r8d, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB52_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    testl %eax, %edx
+; X64-NEXT:    je .LBB52_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB52_3:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %0 = and i16 %c, 15
+  %sh_prom = zext i16 %0 to i32
+  %shl = shl nuw nsw i32 1, %sh_prom
+  %1 = trunc i32 %shl to i16
+  %conv1 = xor i16 %1, -1
+  %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2
+  %conv2 = zext i16 %2 to i32
+  %and = and i32 %shl, %conv2
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv
+  %3 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_blsi_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind {
+; X86-LABEL: atomic_blsi_and_16_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    movzwl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB53_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    andl %edi, %ebx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %bx, (%edx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB53_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    testl %eax, %esi
+; X86-NEXT:    je .LBB53_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movzwl %cx, %eax
+; X86-NEXT:    movzwl (%edx,%eax,2), %eax
+; X86-NEXT:    jmp .LBB53_5
+; X86-NEXT:  .LBB53_3:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:  .LBB53_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_and_16_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    notl %edx
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB53_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %r8d
+; X64-NEXT:    andl %edx, %r8d
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %r8w, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB53_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    testl %eax, %ecx
+; X64-NEXT:    je .LBB53_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB53_3:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %conv = zext i16 %c to i32
+  %sub = sub nsw i32 0, %conv
+  %and = and i32 %conv, %sub
+  %0 = trunc i32 %and to i16
+  %conv2 = xor i16 %0, -1
+  %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2
+  %conv3 = zext i16 %1 to i32
+  %and8 = and i32 %and, %conv3
+  %tobool.not = icmp eq i32 %and8, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i16 %c to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_or_16_const_val(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_or_16_const_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btsw $4, (%ecx)
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $4, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_16_const_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btsw $4, (%rdi)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $4, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2
+  %1 = and i16 %0, 16
+  ret i16 %1
+}
+
+define zeroext i16 @atomic_shl1_or_16_const_valnz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_or_16_const_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB55_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    orl $16, %edx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %dx, (%ecx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB55_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_16_const_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB55_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl $16, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB55_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2
+  %1 = lshr i16 %0, 4
+  %.lobit = and i16 %1, 1
+  ret i16 %.lobit
+}
+
+define zeroext i16 @atomic_shl1_or_16_const_brnz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_or_16_const_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    lock btsw $4, (%eax)
+; X86-NEXT:    jae .LBB56_1
+; X86-NEXT:  # %bb.2: # %if.then
+; X86-NEXT:    movzwl 8(%eax), %eax
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB56_1:
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_16_const_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    lock btsw $4, (%rdi)
+; X64-NEXT:    jae .LBB56_1
+; X64-NEXT:  # %bb.2: # %if.then
+; X64-NEXT:    movzwl 8(%rdi), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB56_1:
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2
+  %1 = and i16 %0, 16
+  %tobool.not = icmp eq i16 %1, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 4
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define zeroext i16 @atomic_shl1_and_16_const_val(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_16_const_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btrw $4, (%ecx)
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $4, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_16_const_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrw $4, (%rdi)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $4, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2
+  %1 = and i16 %0, 16
+  ret i16 %1
+}
+
+define zeroext i16 @atomic_shl1_and_16_const_valz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_16_const_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB58_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andl $65519, %edx # imm = 0xFFEF
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    lock cmpxchgw %dx, (%ecx)
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    jne .LBB58_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb $16, %al
+; X86-NEXT:    sete %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_16_const_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB58_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl $65519, %ecx # imm = 0xFFEF
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    lock cmpxchgw %cx, (%rdi)
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    jne .LBB58_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb $16, %al
+; X64-NEXT:    sete %cl
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2
+  %1 = lshr i16 %0, 4
+  %.lobit = and i16 %1, 1
+  %conv1 = xor i16 %.lobit, 1
+  ret i16 %conv1
+}
+
+define zeroext i16 @atomic_shl1_and_16_const_brz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_16_const_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    lock btrw $4, (%ecx)
+; X86-NEXT:    movw $123, %ax
+; X86-NEXT:    jae .LBB59_1
+; X86-NEXT:  # %bb.2: # %return
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB59_1: # %if.then
+; X86-NEXT:    movzwl 8(%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_16_const_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    lock btrw $4, (%rdi)
+; X64-NEXT:    movw $123, %ax
+; X64-NEXT:    jae .LBB59_1
+; X64-NEXT:  # %bb.2: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB59_1: # %if.then
+; X64-NEXT:    movzwl 8(%rdi), %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2
+  %1 = and i16 %0, 16
+  %tobool.not = icmp eq i16 %1, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 4
+  %2 = load i16, ptr %arrayidx, align 2
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ]
+  ret i16 %retval.0
+}
+
+define i32 @atomic_shl1_or_32_gpr_val(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_or_32_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB60_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    lock cmpxchgl %ecx, (%esi)
+; X86-NEXT:    jne .LBB60_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_32_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB60_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl %edx, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB60_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  ret i32 %and
+}
+
+define i32 @atomic_shl1_small_mask_or_32_gpr_val(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB61_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    lock cmpxchgl %ecx, (%edx)
+; X86-NEXT:    jne .LBB61_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB61_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl %edx, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB61_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  ret i32 %and
+}
+
+define i32 @atomic_shl1_mask0_or_32_gpr_val(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_32_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB62_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB62_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_32_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB62_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB62_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %shl1 = shl nuw i32 1, %c
+  %and = and i32 %1, %shl1
+  ret i32 %and
+}
+
+define i32 @atomic_shl1_mask1_or_32_gpr_val(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_or_32_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB63_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB63_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_or_32_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB63_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB63_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %1 = and i32 %c, 31
+  %shl1 = shl nuw i32 1, %1
+  %and = and i32 %0, %shl1
+  ret i32 %and
+}
+
+define i32 @atomic_shl1_mask01_or_32_gpr_val(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_32_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB64_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    lock cmpxchgl %ecx, (%esi)
+; X86-NEXT:    jne .LBB64_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_32_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB64_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl %edx, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB64_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  ret i32 %and
+}
+
+define i32 @atomic_blsi_or_32_gpr_val(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_or_32_gpr_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB65_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    lock cmpxchgl %esi, (%edx)
+; X86-NEXT:    jne .LBB65_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_or_32_gpr_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB65_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    orl %ecx, %edx
+; X64-NEXT:    lock cmpxchgl %edx, (%rdi)
+; X64-NEXT:    jne .LBB65_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    andl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  ret i32 %and3
+}
+
+define i32 @atomic_shl1_or_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_or_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB66_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB66_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB66_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB66_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %1 = xor i32 %0, -1
+  %2 = lshr i32 %1, %c
+  %lnot.ext = and i32 %2, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_small_mask_or_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB67_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB67_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB67_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB67_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %2 = xor i32 %1, -1
+  %3 = lshr i32 %2, %0
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask0_or_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB68_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB68_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB68_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB68_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %2 = xor i32 %1, -1
+  %3 = lshr i32 %2, %c
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask1_or_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_or_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB69_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB69_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_or_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB69_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB69_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %1 = and i32 %c, 31
+  %2 = xor i32 %0, -1
+  %3 = lshr i32 %2, %1
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask01_or_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $31, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB70_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB70_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $31, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB70_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB70_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %2 = xor i32 %1, -1
+  %3 = lshr i32 %2, %0
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_blsi_or_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_or_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB71_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    lock cmpxchgl %esi, (%ecx)
+; X86-NEXT:    jne .LBB71_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testl %edx, %eax
+; X86-NEXT:    sete %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_or_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    negl %edx
+; X64-NEXT:    andl %esi, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB71_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl %edx, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB71_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    sete %cl
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  %lnot.ext = zext i1 %tobool.not to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_or_32_gpr_valnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_or_32_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB72_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB72_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_32_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB72_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB72_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setb %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %1 = lshr i32 %0, %c
+  %lnot.ext = and i32 %1, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_small_mask_or_32_gpr_valnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB73_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB73_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB73_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB73_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setb %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %2 = lshr i32 %1, %0
+  %lnot.ext = and i32 %2, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask0_or_32_gpr_valnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_32_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB74_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB74_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_32_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB74_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB74_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setb %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %2 = lshr i32 %1, %c
+  %lnot.ext = and i32 %2, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask1_or_32_gpr_valnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_or_32_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB75_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB75_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_or_32_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB75_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB75_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setb %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %1 = and i32 %c, 31
+  %2 = lshr i32 %0, %1
+  %lnot.ext = and i32 %2, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask01_or_32_gpr_valnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_32_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $31, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB76_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB76_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_32_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $31, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB76_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB76_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setb %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %2 = lshr i32 %1, %0
+  %lnot.ext = and i32 %2, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_blsi_or_32_gpr_valnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_or_32_gpr_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB77_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    lock cmpxchgl %esi, (%ecx)
+; X86-NEXT:    jne .LBB77_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testl %edx, %eax
+; X86-NEXT:    setne %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_or_32_gpr_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    negl %edx
+; X64-NEXT:    andl %esi, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB77_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl %edx, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB77_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    setne %cl
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool = icmp ne i32 %and3, 0
+  %lnot.ext = zext i1 %tobool to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_or_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_or_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB78_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB78_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB78_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB78_5
+; X86-NEXT:  .LBB78_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB78_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB78_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB78_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB78_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB78_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_small_mask_or_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB79_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB79_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB79_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB79_5
+; X86-NEXT:  .LBB79_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB79_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB79_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB79_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB79_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB79_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i32 %0 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2
+  %2 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask0_or_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB80_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB80_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB80_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB80_5
+; X86-NEXT:  .LBB80_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB80_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB80_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB80_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB80_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB80_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %shl1 = shl nuw i32 1, %c
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask1_or_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_or_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB81_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB81_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB81_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB81_5
+; X86-NEXT:  .LBB81_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB81_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_or_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB81_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB81_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB81_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB81_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %rem = and i32 %c, 31
+  %shl1 = shl nuw i32 1, %rem
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask01_or_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB82_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB82_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB82_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB82_5
+; X86-NEXT:  .LBB82_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB82_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB82_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB82_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB82_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB82_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_blsi_or_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_or_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB83_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB83_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB83_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB83_5
+; X86-NEXT:  .LBB83_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB83_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_or_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB83_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    orl %ecx, %edx
+; X64-NEXT:    lock cmpxchgl %edx, (%rdi)
+; X64-NEXT:    jne .LBB83_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %ecx, %eax
+; X64-NEXT:    je .LBB83_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB83_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_or_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $1, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB84_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%esi)
+; X86-NEXT:    jne .LBB84_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    testl %edi, %eax
+; X86-NEXT:    jne .LBB84_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB84_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB84_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    orl %esi, %edx
+; X64-NEXT:    lock cmpxchgl %edx, (%rdi)
+; X64-NEXT:    jne .LBB84_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    testl %esi, %eax
+; X64-NEXT:    je .LBB84_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB84_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_small_mask_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB85_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%esi)
+; X86-NEXT:    jne .LBB85_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    testl %edi, %eax
+; X86-NEXT:    jne .LBB85_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB85_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB85_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    orl %esi, %edx
+; X64-NEXT:    lock cmpxchgl %edx, (%rdi)
+; X64-NEXT:    jne .LBB85_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    testl %esi, %eax
+; X64-NEXT:    je .LBB85_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB85_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i32 %0 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2
+  %2 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask0_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB86_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%esi)
+; X86-NEXT:    jne .LBB86_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jb .LBB86_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB86_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB86_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB86_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB86_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB86_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %shl1 = shl nuw i32 1, %c
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask1_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_or_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB87_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %edx, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%esi)
+; X86-NEXT:    jne .LBB87_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jb .LBB87_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB87_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_or_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB87_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB87_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB87_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB87_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %rem = and i32 %c, 31
+  %shl1 = shl nuw i32 1, %rem
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask01_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $1, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB88_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%esi)
+; X86-NEXT:    jne .LBB88_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    testl %edi, %eax
+; X86-NEXT:    jne .LBB88_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB88_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB88_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    orl %esi, %edx
+; X64-NEXT:    lock cmpxchgl %edx, (%rdi)
+; X64-NEXT:    jne .LBB88_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    testl %esi, %eax
+; X64-NEXT:    je .LBB88_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB88_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_blsi_or_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_or_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    negl %edi
+; X86-NEXT:    andl %edx, %edi
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB89_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    lock cmpxchgl %ecx, (%esi)
+; X86-NEXT:    jne .LBB89_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %ecx
+; X86-NEXT:    testl %edi, %eax
+; X86-NEXT:    jne .LBB89_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%edx,4), %ecx
+; X86-NEXT:  .LBB89_4: # %return
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_or_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    negl %edx
+; X64-NEXT:    andl %esi, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB89_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    orl %edx, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB89_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %ecx
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB89_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB89_3: # %if.then
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_or_32_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB90_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB90_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB90_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB90_5
+; X86-NEXT:  .LBB90_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB90_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_or_32_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB90_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB90_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB90_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB90_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_small_mask_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB91_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB91_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB91_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB91_5
+; X86-NEXT:  .LBB91_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB91_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB91_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB91_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB91_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB91_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i32 %0 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2
+  %2 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask0_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_or_32_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB92_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB92_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB92_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB92_5
+; X86-NEXT:  .LBB92_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB92_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_or_32_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB92_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB92_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB92_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB92_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %shl1 = shl nuw i32 1, %c
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_or_32_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB93_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB93_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB93_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB93_5
+; X86-NEXT:  .LBB93_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB93_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_or_32_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB93_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB93_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB93_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB93_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %rem = and i32 %c, 31
+  %shl1 = shl nuw i32 1, %rem
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask01_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_or_32_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB94_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB94_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB94_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB94_5
+; X86-NEXT:  .LBB94_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB94_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_or_32_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB94_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    orl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB94_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %edx, %eax
+; X64-NEXT:    je .LBB94_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB94_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_blsi_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_or_32_gpr_brnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB95_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB95_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    testl %esi, %eax
+; X86-NEXT:    je .LBB95_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB95_5
+; X86-NEXT:  .LBB95_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB95_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_or_32_gpr_brnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB95_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    orl %ecx, %edx
+; X64-NEXT:    lock cmpxchgl %edx, (%rdi)
+; X64-NEXT:    jne .LBB95_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    testl %ecx, %eax
+; X64-NEXT:    je .LBB95_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB95_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_and_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_and_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB96_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB96_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB96_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB96_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %1 = xor i32 %0, -1
+  %2 = lshr i32 %1, %c
+  %lnot.ext = and i32 %2, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_small_mask_and_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_and_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB97_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB97_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_and_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB97_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB97_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %2 = xor i32 %1, -1
+  %3 = lshr i32 %2, %0
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask0_and_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB98_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB98_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB98_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB98_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %2 = xor i32 %1, -1
+  %3 = lshr i32 %2, %c
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask1_and_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_and_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB99_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB99_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_and_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB99_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB99_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %1 = and i32 %c, 31
+  %2 = xor i32 %0, -1
+  %3 = lshr i32 %2, %1
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_mask01_and_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $31, %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB100_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB100_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %dl
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $31, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB100_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB100_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    setae %dl
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 31
+  %shl = shl nuw i32 1, %0
+  %1 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %2 = xor i32 %1, -1
+  %3 = lshr i32 %2, %0
+  %lnot.ext = and i32 %3, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_blsi_and_32_gpr_valz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_and_32_gpr_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock andl %edx, (%ecx)
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_and_32_gpr_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    andl %esi, %ecx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock andl %ecx, (%rdi)
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw and ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  %lnot.ext = zext i1 %tobool.not to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_and_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_and_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    je .LBB102_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB102_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    je .LBB102_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB102_2: # %return
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_small_mask_and_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_and_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    je .LBB103_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB103_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_and_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    je .LBB103_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB103_2: # %return
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i32 %0 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2
+  %2 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask0_and_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB104_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB104_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB104_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB104_5
+; X86-NEXT:  .LBB104_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB104_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB104_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB104_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB104_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB104_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %shl1 = shl nuw i32 1, %c
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask1_and_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_and_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB105_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %esi, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%edx)
+; X86-NEXT:    jne .LBB105_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jae .LBB105_3
+; X86-NEXT:  # %bb.4: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB105_5
+; X86-NEXT:  .LBB105_3:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:  .LBB105_5: # %return
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_and_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB105_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB105_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB105_3
+; X64-NEXT:  # %bb.4: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB105_3:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %rem = and i32 %c, 31
+  %shl1 = shl nuw i32 1, %rem
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask01_and_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    je .LBB106_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB106_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    je .LBB106_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB106_2: # %return
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_blsi_and_32_gpr_br(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_and_32_gpr_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    je .LBB107_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB107_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_and_32_gpr_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    je .LBB107_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB107_2: # %return
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw and ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_and_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_and_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    jne .LBB108_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB108_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    jne .LBB108_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB108_2: # %return
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_small_mask_and_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_small_mask_and_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $15, %ecx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    jne .LBB109_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB109_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_small_mask_and_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andl $15, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    jne .LBB109_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB109_2: # %return
+; X64-NEXT:    retq
+entry:
+  %0 = and i32 %c, 15
+  %shl = shl nuw nsw i32 1, %0
+  %1 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %1, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i32 %0 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2
+  %2 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask0_and_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask0_and_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB110_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %edx, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%esi)
+; X86-NEXT:    jne .LBB110_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jb .LBB110_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB110_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask0_and_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB110_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB110_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB110_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB110_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %shl1 = shl nuw i32 1, %c
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask1_and_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask1_and_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movl (%esi), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB111_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    andl %edx, %edi
+; X86-NEXT:    lock cmpxchgl %edi, (%esi)
+; X86-NEXT:    jne .LBB111_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    movl $123, %edx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    jb .LBB111_4
+; X86-NEXT:  # %bb.3: # %if.then
+; X86-NEXT:    movl (%esi,%ecx,4), %edx
+; X86-NEXT:  .LBB111_4: # %return
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask1_and_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB111_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    andl %edx, %esi
+; X64-NEXT:    lock cmpxchgl %esi, (%rdi)
+; X64-NEXT:    jne .LBB111_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    movl $123, %edx
+; X64-NEXT:    btl %ecx, %eax
+; X64-NEXT:    jae .LBB111_3
+; X64-NEXT:  # %bb.4: # %return
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB111_3: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %edx
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    retq
+entry:
+  %shl = shl nuw i32 1, %c
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %rem = and i32 %c, 31
+  %shl1 = shl nuw i32 1, %rem
+  %and = and i32 %0, %shl1
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_mask01_and_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_shl1_mask01_and_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    jne .LBB112_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB112_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_mask01_and_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    jne .LBB112_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB112_2: # %return
+; X64-NEXT:    retq
+entry:
+  %rem = and i32 %c, 31
+  %shl = shl nuw i32 1, %rem
+  %0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
+  %and = and i32 %0, %shl
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %conv = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_blsi_and_32_gpr_brz(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_blsi_and_32_gpr_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    lock andl %eax, (%edx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    jne .LBB113_2
+; X86-NEXT:  # %bb.1: # %if.then
+; X86-NEXT:    movl (%edx,%ecx,4), %eax
+; X86-NEXT:  .LBB113_2: # %return
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_blsi_and_32_gpr_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:    lock andl %eax, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    jne .LBB113_2
+; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:  .LBB113_2: # %return
+; X64-NEXT:    retq
+entry:
+  %sub = sub i32 0, %c
+  %and = and i32 %sub, %c
+  %0 = atomicrmw and ptr %v, i32 %and monotonic, align 4
+  %and3 = and i32 %0, %and
+  %tobool.not = icmp eq i32 %and3, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i32 %c to i64
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_xor_32_const_val(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_xor_32_const_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btcl $4, (%ecx)
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $4, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_32_const_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btcl $4, (%rdi)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $4, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4
+  %and = and i32 %0, 16
+  ret i32 %and
+}
+
+define i32 @atomic_shl1_xor_32_const_valz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_xor_32_const_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB115_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    xorl $16, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NEXT:    jne .LBB115_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb $16, %al
+; X86-NEXT:    sete %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_32_const_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB115_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl $16, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB115_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb $16, %al
+; X64-NEXT:    sete %cl
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4
+  %and = lshr i32 %0, 4
+  %and.lobit = and i32 %and, 1
+  %lnot.ext = xor i32 %and.lobit, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_xor_32_const_valnz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_xor_32_const_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB116_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    xorl $16, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NEXT:    jne .LBB116_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_xor_32_const_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB116_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    xorl $16, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB116_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4
+  %and = lshr i32 %0, 4
+  %and.lobit = and i32 %and, 1
+  ret i32 %and.lobit
+}
+
+define i32 @atomic_shl1_and_32_const_val(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_32_const_val:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    lock btrl $4, (%ecx)
+; X86-NEXT:    setb %al
+; X86-NEXT:    shll $4, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_const_val:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    lock btrl $4, (%rdi)
+; X64-NEXT:    setb %al
+; X64-NEXT:    shll $4, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4
+  %and = and i32 %0, 16
+  ret i32 %and
+}
+
+define i32 @atomic_shl1_and_32_const_valz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_32_const_valz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB118_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andl $-17, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NEXT:    jne .LBB118_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb $16, %al
+; X86-NEXT:    sete %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_const_valz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB118_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl $-17, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB118_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb $16, %al
+; X64-NEXT:    sete %cl
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4
+  %and = lshr i32 %0, 4
+  %and.lobit = and i32 %and, 1
+  %lnot.ext = xor i32 %and.lobit, 1
+  ret i32 %lnot.ext
+}
+
+define i32 @atomic_shl1_and_32_const_valnz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_32_const_valnz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB119_1: # %atomicrmw.start
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    andl $-17, %edx
+; X86-NEXT:    lock cmpxchgl %edx, (%ecx)
+; X86-NEXT:    jne .LBB119_1
+; X86-NEXT:  # %bb.2: # %atomicrmw.end
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_const_valnz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB119_1: # %atomicrmw.start
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andl $-17, %ecx
+; X64-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT:    jne .LBB119_1
+; X64-NEXT:  # %bb.2: # %atomicrmw.end
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4
+  %and = lshr i32 %0, 4
+  %and.lobit = and i32 %and, 1
+  ret i32 %and.lobit
+}
+
+define i32 @atomic_shl1_and_32_const_br(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_32_const_br:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    lock btrl $4, (%eax)
+; X86-NEXT:    jae .LBB120_1
+; X86-NEXT:  # %bb.2: # %if.then
+; X86-NEXT:    movl 16(%eax), %eax
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB120_1:
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_const_br:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    lock btrl $4, (%rdi)
+; X64-NEXT:    jae .LBB120_1
+; X64-NEXT:  # %bb.2: # %if.then
+; X64-NEXT:    movl 16(%rdi), %eax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB120_1:
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4
+  %and = and i32 %0, 16
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 4
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @atomic_shl1_and_32_const_brz(ptr %v) nounwind {
+; X86-LABEL: atomic_shl1_and_32_const_brz:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    lock btrl $4, (%ecx)
+; X86-NEXT:    movl $123, %eax
+; X86-NEXT:    jae .LBB121_1
+; X86-NEXT:  # %bb.2: # %return
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB121_1: # %if.then
+; X86-NEXT:    movl 16(%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: atomic_shl1_and_32_const_brz:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    lock btrl $4, (%rdi)
+; X64-NEXT:    movl $123, %eax
+; X64-NEXT:    jae .LBB121_1
+; X64-NEXT:  # %bb.2: # %return
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB121_1: # %if.then
+; X64-NEXT:    movl 16(%rdi), %eax
+; X64-NEXT:    retq
+entry:
+  %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4
+  %and = and i32 %0, 16
+  %tobool.not = icmp eq i32 %and, 0
+  br i1 %tobool.not, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 4
+  %1 = load i32, ptr %arrayidx, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
+  ret i32 %retval.0
+}